diff --git a/config/ruff.toml b/config/ruff.toml index 92f0d06..935bf10 100644 --- a/config/ruff.toml +++ b/config/ruff.toml @@ -6,6 +6,7 @@ include = [ "src/readii/feature_extraction.py", "src/readii/cli/**/*.py", "src/readii/negative_controls_refactor/**.py", + "src/readii/io/writers/**.py", ] # extend-exclude is used to exclude directories from the flake8 checks @@ -16,7 +17,9 @@ extend-exclude = [ "src/readii/image_processing.py", "src/readii/metadata.py", "src/readii/negative_controls.py", - "src/readii/pipeline.py",] + "src/readii/pipeline.py", + "notebooks/*", +] # Same as Black. line-length = 100 diff --git a/notebooks/nifti_writer_example.ipynb b/notebooks/nifti_writer_example.ipynb new file mode 100644 index 0000000..685c59a --- /dev/null +++ b/notebooks/nifti_writer_example.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example subclass for writing NIFTI files" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import the NIFTIWriter class created in READII along with other necessary imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from readii.io.writers.nifti_writer import NIFTIWriter\n", + "from readii.io.writers.base_writer import BaseWriter\n", + "from pathlib import Path\n", + "import subprocess\n", + "import SimpleITK as sitk\n", + "import pandas as pd\n", + "import uuid\n", + "import random\n", + "import sys\n", + "from readii.utils import logger" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define a writer subclass for writing .csv files" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# copy this writer from the other notebook:\n", + "class CSVWriter(BaseWriter): # noqa\n", + "\n", + " # The save method is the only method that needs to be implemented for the subclasses of BaseWriter\n", + " def save(self, data: list, **kwargs) -> Path: # noqa\n", + " output_path = self.resolve_path(**kwargs)\n", + " with output_path.open('w') as f: # noqa\n", + " pd.DataFrame(data).to_csv(f, index=False)\n", + " return output_path\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Show how the NIFTI Writer can be used on SimpleITK images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TRASH/writer_examples/nifti_writer_examples/\n", + "├── PatientID-AliceSmith/\n", + "│   ├── AliceSmith_metadata.csv\n", + "│   └── Study-Study003/\n", + "│   ├── CT/\n", + "│   │   └── CT_SeriesUID-13278.nii.gz\n", + "│   └── RTSTRUCT/\n", + "│   └── RTSTRUCT_SeriesUID-39256.nii.gz\n", + "├── PatientID-JaneDoe/\n", + "│   ├── JaneDoe_metadata.csv\n", + "│   └── Study-Study002/\n", + "│   ├── CT/\n", + "│   │   └── CT_SeriesUID-24592.nii.gz\n", + "│   └── RTSTRUCT/\n", + "│   └── RTSTRUCT_SeriesUID-42098.nii.gz\n", + "└── PatientID-JohnAdams/\n", + " ├── JohnAdams_metadata.csv\n", + " └── Study-Study001/\n", + " ├── CT/\n", + " │   └── CT_SeriesUID-93810.nii.gz\n", + " └── RTSTRUCT/\n", + " └── RTSTRUCT_SeriesUID-46048.nii.gz\n", + "\n", + "13 directories, 9 files\n", + "\n" + ] + } + ], + "source": [ + "ROOT_DIRECTORY = Path(\"TRASH\", \"writer_examples\", \"nifti_writer_examples\")\n", + "IMAGE_FILENAME_FORMAT = \"PatientID-{PatientID}/Study-{Study}/{Modality}/{Modality}_SeriesUID-{SeriesUID}\"\n", + "METADATA_FILENAME_FORMAT = \"PatientID-{PatientID}/{PatientID}\"\n", + "\n", + "data_sets = []\n", + "random.seed(42) # Set random seed for reproducibility\n", + "\n", + "random_5d = lambda: random.randint(10000, 99999)\n", + "\n", + "# Set up some dummy images to save as NIFTI files\n", + "for MODALITY in [\"CT\", \"RTSTRUCT\"]:\n", + " data_sets.extend([\n", + " {\n", + " \"image\": sitk.Image(10, 10, 10, sitk.sitkInt16),\n", + " \"metadata\": pd.DataFrame({\"PatientID\": [\"JohnAdams\"], \"Study\": [\"Study001\"]}),\n", + " \"PatientID\": \"JohnAdams\",\n", + " \"Study\": \"Study001\",\n", + " \"Modality\": MODALITY,\n", + " \"SeriesUID\": random_5d(),\n", + " },\n", + " {\n", + " \"image\": sitk.Image(20, 20, 20, sitk.sitkInt16),\n", + " \"metadata\": pd.DataFrame({\"PatientID\": [\"JaneDoe\"], \"Study\": [\"Study002\"]}),\n", + " \"PatientID\": \"JaneDoe\",\n", + " \"Study\": \"Study002\",\n", + " \"Modality\": MODALITY,\n", + " \"SeriesUID\": random_5d(),\n", + " },\n", + " {\n", + " \"image\": sitk.Image(30, 30, 30, sitk.sitkInt16),\n", + " \"metadata\": pd.DataFrame({\"PatientID\": [\"AliceSmith\"], \"Study\": [\"Study003\"]}),\n", + " \"PatientID\": \"AliceSmith\",\n", + " \"Study\": \"Study003\",\n", + " \"Modality\": MODALITY,\n", + " \"SeriesUID\": random_5d(),\n", + " }\n", + " ])\n", + "\n", + "# Create a writer with the specified root directory and filename format\n", + "with (\n", + " NIFTIWriter(\n", + " root_directory=ROOT_DIRECTORY, \n", + " filename_format=f\"{IMAGE_FILENAME_FORMAT}.nii.gz\",\n", + " overwrite=True\n", + " ) as nifti_writer,\n", + " CSVWriter(\n", + " root_directory=ROOT_DIRECTORY, \n", + " filename_format=f\"{METADATA_FILENAME_FORMAT}_metadata.csv\",\n", + " ) as metadata_writer\n", + "):\n", + " # Iterate over the data sets and save them\n", + " for data_set in data_sets:\n", + "\n", + " # The actual data being saved is image or data, but the rest of the kwargs are \n", + " # only for resolving the filename\n", + " try:\n", + " nifti_writer.save(\n", + " image=data_set[\"image\"],\n", + " PatientID=data_set[\"PatientID\"],\n", + " Study=data_set[\"Study\"],\n", + " Modality=data_set[\"Modality\"],\n", + " SeriesUID=data_set[\"SeriesUID\"]\n", + " )\n", + " metadata_writer.save(\n", + " data=data_set[\"metadata\"],\n", + " PatientID=data_set[\"PatientID\"],\n", + " Study=data_set[\"Study\"],\n", + " Modality=data_set[\"Modality\"],\n", + " SeriesUID=data_set[\"SeriesUID\"]\n", + " )\n", + " except FileExistsError as e:\n", + " logger.exception(f\"Error saving data set: {e}\")\n", + " sys.exit(1)\n", + "\n", + "output = subprocess.check_output([\"tree\", \"-nF\", ROOT_DIRECTORY])\n", + "print(output.decode(\"utf-8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/nifti_writer_example.pdf b/notebooks/nifti_writer_example.pdf new file mode 100644 index 0000000..876a60a Binary files /dev/null and b/notebooks/nifti_writer_example.pdf differ diff --git a/notebooks/writer_examples.ipynb b/notebooks/writer_examples.ipynb new file mode 100644 index 0000000..4617515 --- /dev/null +++ b/notebooks/writer_examples.ipynb @@ -0,0 +1,274 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Writing Files with the a BaseWriter Subclass" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from readii.io.writers.base_writer import BaseWriter\n", + "from readii.utils import logger\n", + "import SimpleITK as sitk\n", + "from pathlib import Path\n", + "import json\n", + "from typing import Any" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a subclass of BaseWriter for writing text files" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Example subclass for writing text files\n", + "# Define a concrete subclass of BaseWriter that will handle the saving of a specific file type\n", + "# this is a simple example with no validation or error handling\n", + "class TextWriter(BaseWriter):\n", + " def save(self, content: str, **kwargs: Any) -> Path:\n", + " output_path = self.resolve_path(**kwargs)\n", + " with open(output_path, 'w') as f:\n", + " f.write(content)\n", + " return output_path" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating and using different writers for different filename patterns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "example_file_formats = [\n", + " # a placeholder can be of the format {key} or %key\n", + " # {key} is useful for python code, whereas %key is useful for use in CLI or bash scripts where using {} would be problematic\n", + " \"notes_%SubjectID.txt\",\n", + " \"notes_{SubjectID}.txt\",\n", + "\n", + " # You define the placeholder that you will later pass in as a keyword argument in the save method\n", + " # By default, the writer automatically generates data for the current \"date\", \"time\", and \"date_time\" \n", + " # so those can be used as placeholders\n", + " # Every other placeholder needs to be passed in as a keyword argument in the save method\n", + " \"important-file-name_{SubjectID}_{date}.txt\",\n", + " \"subjects/{SubjectID}/{time}_result.txt\",\n", + " \"subjects/{SubjectID}_Birthday-{SubjectBirthDate}/data_{date_time}.txt\",\n", + "]\n", + "\n", + "# Create text writers with different filename patterns\n", + "text_writers = [\n", + " TextWriter(\n", + " root_directory=\"TRASH/writer_examples/text_data\",\n", + " filename_format=fmt\n", + " ) for fmt in example_file_formats\n", + "]\n", + "\n", + "# Define some example data to pass to the writers\n", + "# this could be extracted from some data source and used to generate the file names\n", + "SubjectID=\"SUBJ001\"\n", + "SubjectBirthDate=\"2022-01-01\"\n", + "\n", + "# Test text writers\n", + "for writer in text_writers:\n", + " path = writer.save(\n", + " content = \"Sample text content\", # this is the data that will be written to the file\n", + "\n", + " # They key-value pairs can be passed in as keyword arguments, and matched to placeholders in the filename format\n", + " SubjectID=SubjectID, \n", + " SubjectBirthDate=SubjectBirthDate,\n", + "\n", + " # If you pass in a key that is not in the filename format, it will be ignored\n", + " # this can also be seen as `SubjectBirthDate` is only used in one of the above filename formats\n", + " RandomKey=\"This will be ignored\",\n", + " RandomKey2=\"This will also be ignored\"\n", + " )\n", + " print(f\"{writer.__class__.__name__} with format [magenta]'{writer.pattern_resolver.formatted_pattern}':\")\n", + " print(f\"File written to: [green]{path}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# More detailed example" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import subprocess\n", + "import pandas as pd\n", + "\n", + "# Any subclass has to be initialized with a root directory and a filename format\n", + "# which might not be obvious at first\n", + "\n", + "class CSVWriter(BaseWriter): # noqa\n", + "\n", + " # The save method is the only method that needs to be implemented for the subclasses of BaseWriter\n", + " def save(self, data: list, **kwargs: Any) -> Path: # noqa\n", + " output_path = self.resolve_path(**kwargs)\n", + " with output_path.open('w') as f: # noqa\n", + " pd.DataFrame(data).to_csv(f, index=False)\n", + " return output_path\n", + "\n", + "# Make some fake data\n", + "subject_data_examples = [\n", + " {\n", + " \"PatientID\": f\"PAT{i:03d}\",\n", + " \"Modality\": f\"{MODALITY}\",\n", + " \"Study\": f\"Study{j:03d}\",\n", + " \"DataType\": f\"{DATA_TYPE}\",\n", + " }\n", + " for i in range(1, 4)\n", + " for j in range(1, 3)\n", + " for MODALITY in [\"CT\", \"RTSTRUCT\"]\n", + " for DATA_TYPE in [\"raw\", \"processed\", \"segmented\", \"labeled\"]\n", + "]\n", + "ROOT_DIRECTORY = Path(\"TRASH/writer_examples/csv_examples/patient_data\")\n", + "with CSVWriter(\n", + " root_directory=ROOT_DIRECTORY,\n", + " filename_format=\"PatientID-{PatientID}/Study-{Study}/{Modality}/{DataType}-data.csv\"\n", + ") as csv_writer:\n", + " # Test CSV writers\n", + " for patient in subject_data_examples:\n", + " path = csv_writer.save(\n", + " data = pd.DataFrame(patient, index=[0]), # just assume that this dataframe is some real data\n", + " PatientID=patient[\"PatientID\"],\n", + " Study=patient[\"Study\"],\n", + " Modality=patient[\"Modality\"],\n", + " DataType=patient[\"DataType\"]\n", + " )\n", + "\n", + "# run the tree command and capture the output\n", + "output = subprocess.check_output([\"tree\", \"-nF\", ROOT_DIRECTORY])\n", + "# print(output.decode(\"utf-8\"))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Output would look like:\n", + "\n", + "```console\n", + "TRASH/writer_examples/csv_examples/patient_data/\n", + "├── PatientID-PAT001/\n", + "│   ├── Study-Study001/\n", + "│   │   ├── CT/\n", + "│   │   │   ├── labeled-data.csv\n", + "│   │   │   ├── processed-data.csv\n", + "│   │   │   ├── raw-data.csv\n", + "│   │   │   └── segmented-data.csv\n", + "│   │   └── RTSTRUCT/\n", + "│   │   ├── labeled-data.csv\n", + "│   │   ├── processed-data.csv\n", + "│   │   ├── raw-data.csv\n", + "│   │   └── segmented-data.csv\n", + "│   └── Study-Study002/\n", + "│   ├── CT/\n", + "│   │   ├── labeled-data.csv\n", + "│   │   ├── processed-data.csv\n", + "│   │   ├── raw-data.csv\n", + "│   │   └── segmented-data.csv\n", + "│   └── RTSTRUCT/\n", + "│   ├── labeled-data.csv\n", + "│   ├── processed-data.csv\n", + "│   ├── raw-data.csv\n", + "│   └── segmented-data.csv\n", + "├── PatientID-PAT002/\n", + "│   ├── Study-Study001/\n", + "│   │   ├── CT/\n", + "│   │   │   ├── labeled-data.csv\n", + "│   │   │   ├── processed-data.csv\n", + "│   │   │   ├── raw-data.csv\n", + "│   │   │   └── segmented-data.csv\n", + "│   │   └── RTSTRUCT/\n", + "│   │   ├── labeled-data.csv\n", + "│   │   ├── processed-data.csv\n", + "│   │   ├── raw-data.csv\n", + "│   │   └── segmented-data.csv\n", + "│   └── Study-Study002/\n", + "│   ├── CT/\n", + "│   │   ├── labeled-data.csv\n", + "│   │   ├── processed-data.csv\n", + "│   │   ├── raw-data.csv\n", + "│   │   └── segmented-data.csv\n", + "│   └── RTSTRUCT/\n", + "│   ├── labeled-data.csv\n", + "│   ├── processed-data.csv\n", + "│   ├── raw-data.csv\n", + "│   └── segmented-data.csv\n", + "└── PatientID-PAT003/\n", + " ├── Study-Study001/\n", + " │   ├── CT/\n", + " │   │   ├── labeled-data.csv\n", + " │   │   ├── processed-data.csv\n", + " │   │   ├── raw-data.csv\n", + " │   │   └── segmented-data.csv\n", + " │   └── RTSTRUCT/\n", + " │   ├── labeled-data.csv\n", + " │   ├── processed-data.csv\n", + " │   ├── raw-data.csv\n", + " │   └── segmented-data.csv\n", + " └── Study-Study002/\n", + " ├── CT/\n", + " │   ├── labeled-data.csv\n", + " │   ├── processed-data.csv\n", + " │   ├── raw-data.csv\n", + " │   └── segmented-data.csv\n", + " └── RTSTRUCT/\n", + " ├── labeled-data.csv\n", + " ├── processed-data.csv\n", + " ├── raw-data.csv\n", + " └── segmented-data.csv\n", + "\n", + "22 directories, 48 files\n", + "\n", + "\n", + "```\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (Pixi)", + "language": "python", + "name": "pixi-kernel-python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/writer_examples.pdf b/notebooks/writer_examples.pdf new file mode 100644 index 0000000..b0ec9fc Binary files /dev/null and b/notebooks/writer_examples.pdf differ diff --git a/pixi.lock b/pixi.lock index 6dcae5a..8ebebdf 100644 --- a/pixi.lock +++ b/pixi.lock @@ -54,7 +54,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/d3/c8/529101d7176fe7dfe1d99604e48d69c5dfdcadb4f06561f465c8ef12b4df/multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/7f/42/6e0f2c2d5c60f499aa29be14f860dd4539de322cd8fb84ee01553493fb4d/pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl @@ -125,7 +125,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/a2/12/adb6b3200c363062f805275b4c1e656be2b3681aada66c80129932ff0bae/multidict-6.1.0-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/4f/d5/1caabedd8863526a6cfa44ee7a833bd97f945dc1d56824d6d76e11731939/pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl @@ -393,7 +393,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/ef/82/7a9d0550484a62c6da82858ee9419f3dd1ccc9aa1c26a1e43da3ecd20b0d/natsort-8.4.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/7f/42/6e0f2c2d5c60f499aa29be14f860dd4539de322cd8fb84ee01553493fb4d/pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/27/a6/98651e752a49f341aa99aa3f6c8ba361728dfc064242884355419df63669/pydicom-3.0.1-py3-none-any.whl @@ -635,7 +635,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/ef/82/7a9d0550484a62c6da82858ee9419f3dd1ccc9aa1c26a1e43da3ecd20b0d/natsort-8.4.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/4f/d5/1caabedd8863526a6cfa44ee7a833bd97f945dc1d56824d6d76e11731939/pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/27/a6/98651e752a49f341aa99aa3f6c8ba361728dfc064242884355419df63669/pydicom-3.0.1-py3-none-any.whl @@ -779,7 +779,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/ef/82/7a9d0550484a62c6da82858ee9419f3dd1ccc9aa1c26a1e43da3ecd20b0d/natsort-8.4.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/7f/42/6e0f2c2d5c60f499aa29be14f860dd4539de322cd8fb84ee01553493fb4d/pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/27/a6/98651e752a49f341aa99aa3f6c8ba361728dfc064242884355419df63669/pydicom-3.0.1-py3-none-any.whl @@ -906,7 +906,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/ef/82/7a9d0550484a62c6da82858ee9419f3dd1ccc9aa1c26a1e43da3ecd20b0d/natsort-8.4.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/4f/d5/1caabedd8863526a6cfa44ee7a833bd97f945dc1d56824d6d76e11731939/pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/27/a6/98651e752a49f341aa99aa3f6c8ba361728dfc064242884355419df63669/pydicom-3.0.1-py3-none-any.whl @@ -1071,7 +1071,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/49/3c/245c45730e088d0467621cb736bf4c07c90f5ced084ef0ff6ed178d44de7/med_imagetools-1.10.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/7f/42/6e0f2c2d5c60f499aa29be14f860dd4539de322cd8fb84ee01553493fb4d/pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/27/a6/98651e752a49f341aa99aa3f6c8ba361728dfc064242884355419df63669/pydicom-3.0.1-py3-none-any.whl @@ -1211,7 +1211,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/49/3c/245c45730e088d0467621cb736bf4c07c90f5ced084ef0ff6ed178d44de7/med_imagetools-1.10.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/4f/d5/1caabedd8863526a6cfa44ee7a833bd97f945dc1d56824d6d76e11731939/pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/27/a6/98651e752a49f341aa99aa3f6c8ba361728dfc064242884355419df63669/pydicom-3.0.1-py3-none-any.whl @@ -1300,7 +1300,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/a6/64/2dd6c4c681688c0165dea3975a6a4eab4944ea30f35000f8b8af1df3148c/multidict-6.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/4b/d7/ecf66c1cd12dc28b4040b15ab4d17b773b87fa9d29ca16125de01adb36cd/numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/44/50/7db2cd5e6373ae796f0ddad3675268c8d59fb6076e66f0c339d61cea886b/pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/41/c3/94f33af0762ed76b5a237c5797e088aa57f2b7fa8ee7932d399087be66a8/pillow-11.0.0-cp310-cp310-manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/fc/e1/e0a2ed6394b5772508868a977d3238f4afb2eebaf9976f0b44a8d347ad63/propcache-0.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl @@ -1383,7 +1383,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/ff/10/71f1379b05b196dae749b5ac062e87273e3f11634f447ebac12a571d90ae/multidict-6.1.0-cp310-cp310-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/20/f7/b24208eba89f9d1b58c1668bc6c8c4fd472b20c45573cb767f59d49fb0f6/numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/f2/c4527768739ffa4469b2b4fff05aa3768a478aed89a2f271a79a40eee984/pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/6a/1d/1f51e6e912d8ff316bb3935a8cda617c801783e0b998bf7a894e91d3bd4c/pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/2d/62/685d3cf268b8401ec12b250b925b21d152b9d193b7bffa5fdc4815c392c2/propcache-0.2.1-cp310-cp310-macosx_11_0_arm64.whl @@ -1482,7 +1482,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/ba/af/73d13b918071ff9b2205fcf773d316e0f8fefb4ec65354bbcf0b10908cc6/multidict-6.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/d0/edc009c27b406c4f9cbc79274d6e46d634d139075492ad055e3d68445925/numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/39/63/b3fc299528d7df1f678b0666002b37affe6b8751225c3d9c12cf530e73ed/pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/85/14/01fe53580a8e1734ebb704a3482b7829a0ef4ea68d356141cf0994d9659b/propcache-0.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl @@ -1565,7 +1565,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/70/0f/6dc70ddf5d442702ed74f298d69977f904960b82368532c88e854b79f72b/multidict-6.1.0-cp311-cp311-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/1a/2e/151484f49fd03944c4a3ad9c418ed193cfd02724e138ac8a9505d056c582/numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/25/b3/2b54a1d541accebe6bd8b1358b34ceb2c509f51cb7dcda8687362490da5b/pillow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/3c/09/8386115ba7775ea3b9537730e8cf718d83bbf95bffe30757ccf37ec4e5da/propcache-0.2.1-cp311-cp311-macosx_11_0_arm64.whl @@ -1664,7 +1664,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/d3/c8/529101d7176fe7dfe1d99604e48d69c5dfdcadb4f06561f465c8ef12b4df/multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/7f/42/6e0f2c2d5c60f499aa29be14f860dd4539de322cd8fb84ee01553493fb4d/pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/1c/07/ebe102777a830bca91bbb93e3479cd34c2ca5d0361b83be9dbd93104865e/propcache-0.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl @@ -1747,7 +1747,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/a2/12/adb6b3200c363062f805275b4c1e656be2b3681aada66c80129932ff0bae/multidict-6.1.0-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/4f/d5/1caabedd8863526a6cfa44ee7a833bd97f945dc1d56824d6d76e11731939/pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/4a/de/bbe712f94d088da1d237c35d735f675e494a816fd6f54e9db2f61ef4d03f/propcache-0.2.1-cp312-cp312-macosx_11_0_arm64.whl @@ -5299,10 +5299,10 @@ packages: - pkg:pypi/optype?source=hash-mapping size: 123504 timestamp: 1733329825349 -- pypi: https://files.pythonhosted.org/packages/8b/95/1f279f406cd97b62a4058188736383bf612d633874be0cca2f97b06de728/orcestra_downloader-0.10.0-py3-none-any.whl +- pypi: https://files.pythonhosted.org/packages/60/e6/219bca783de4d7c3b479e21d0a86b1de577154855242ca55d574eea504af/orcestra_downloader-0.11.0-py3-none-any.whl name: orcestra-downloader - version: 0.10.0 - sha256: 3d6dbc8426d34496bb39ba416498cfaef151f5d8df108551c01f6c61fbc65db4 + version: 0.11.0 + sha256: 463a1573b8c4fdf466841ac47b4a2d7dd1158e5f3055223d87c57bfa66b0046e requires_dist: - aiohttp>=3.11.4 - click>=8.1.7 @@ -7117,8 +7117,8 @@ packages: timestamp: 1728642457661 - pypi: . name: readii - version: 1.20.0 - sha256: 081b822f21841d6330d585a4788daa01ee864f58fb2d70395cfed063961a5a3c + version: 1.21.0 + sha256: 712416b55c52a31c85e7ae84be7842a9ec9df227d3b1de0018aa5d1ff0a15ad4 requires_dist: - simpleitk>=2.3.1 - matplotlib>=3.9.2,<4 diff --git a/src/readii/io/__init__.py b/src/readii/io/__init__.py new file mode 100644 index 0000000..2938c56 --- /dev/null +++ b/src/readii/io/__init__.py @@ -0,0 +1 @@ +"""Tools for reading and writing data.""" \ No newline at end of file diff --git a/src/readii/io/utils/__init__.py b/src/readii/io/utils/__init__.py new file mode 100644 index 0000000..62c805d --- /dev/null +++ b/src/readii/io/utils/__init__.py @@ -0,0 +1,8 @@ +"""Utilities for the io module.""" + +from .pattern_resolver import PatternResolver, PatternResolverError + +__all__ = [ + "PatternResolver", + "PatternResolverError", +] \ No newline at end of file diff --git a/src/readii/io/utils/pattern_resolver.py b/src/readii/io/utils/pattern_resolver.py new file mode 100644 index 0000000..15d58a2 --- /dev/null +++ b/src/readii/io/utils/pattern_resolver.py @@ -0,0 +1,120 @@ +import re +from dataclasses import dataclass, field +from typing import Any, ClassVar, Dict, Tuple + +from imgtools.dicom.sort.exceptions import InvalidPatternError # type: ignore +from imgtools.dicom.sort.parser import PatternParser # type: ignore + +from readii.utils import logger + + +# Define custom exceptions +class PatternResolverError(Exception): + """Base exception for errors in pattern resolution.""" + + pass + +@dataclass +class PatternResolver: + r"""Handles parsing and validating filename patterns. + + By default, this class uses the following pattern parser: + + >>> DEFAULT_PATTERN: re.Pattern = re.compile(r"%(\w+)|\{(\w+)\}") + + This will match placeholders of the form `{key}` or `%(key)s`. + + Example + ------- + Given a filename format like `"{subject_id}_{date}/{disease}.txt"`, the pattern parser + will extract the following keys: + + >>> pattern_resolver.keys + {'subject_id', 'date', 'disease'} + + And the following formatted pattern: + + >>> pattern_resolver.formatted_pattern + %(subject_id)s_%(date)s/%(disease)s.txt + + So you could resolve the pattern like this: + + >>> data_dict = {"subject_id": "JohnDoe", "date": "January-01-2025", "disease": "cancer"} + + >>> pattern_resolver.formatted_pattern % data_dict + 'JohnDoe_01-01-2025/cancer.txt' + + A more convenient way to resolve the pattern is to use the `resolve` method: + >>> pattern_resolver.resolve(data_dict)) + 'JohnDoe_01-01-2025/cancer.txt' + """ + + filename_format: str = field(init=True) + + DEFAULT_PATTERN: ClassVar[re.Pattern] = re.compile(r"%(\w+)|\{(\w+)\}") + + def __init__(self, filename_format: str) -> None: + self.filename_format = filename_format + + try: + self.pattern_parser = PatternParser( + self.filename_format, pattern_parser=self.DEFAULT_PATTERN + ) + self.formatted_pattern, self.keys = self.parse() # Validate the pattern by parsing it + except InvalidPatternError as e: + msg = f"Invalid filename format: {e}" + raise PatternResolverError(msg) from e + else: + logger.debug("All keys are valid.", keys=self.keys) + logger.debug("Formatted Pattern valid.", formatted_pattern=self.formatted_pattern) + + def parse(self) -> Tuple[str, list[str]]: + """ + Parse and validate the pattern. + + Returns + ------- + Tuple[str, List[str]] + The formatted pattern string and a list of extracted keys. + + Raises + ------ + InvalidPatternError + If the pattern contains no valid placeholders or is invalid. + """ + formatted_pattern, keys = self.pattern_parser.parse() + return formatted_pattern, keys + + def resolve(self, context: Dict[str, Any]) -> str: + """Resolve the pattern using the provided context dictionary. + + Parameters + ---------- + context : Dict[str, Any] + Dictionary containing key-value pairs to substitute in the pattern. + + Returns + ------- + str + The resolved pattern string with placeholders replaced by values. + + Raises + ------ + PatternResolverError + If a required key is missing from the context dictionary. + """ + if None in context.values(): + msg = "None is not a valid value for a placeholder in the pattern." + none_keys = [key for key, value in context.items() if value is None] + msg += f" None keys: {none_keys}" + raise PatternResolverError(msg) + + try: + return self.formatted_pattern % context + except KeyError as e: + # missing_key = e.args[0] + missing_keys = set(context.keys()) - set(self.keys) + msg = f"Missing value for placeholder(s): {missing_keys}" + msg += "\nPlease provide a value for this key in the `context` argument." + msg += f" i.e `{self.__class__.__name__}.save(..., {e.args[0]}=value)`." + raise PatternResolverError(msg) from e \ No newline at end of file diff --git a/src/readii/io/writers/README.md b/src/readii/io/writers/README.md new file mode 100644 index 0000000..703b24e --- /dev/null +++ b/src/readii/io/writers/README.md @@ -0,0 +1,112 @@ +# Understanding How the Writer Works + +The Writer system is designed to provide a flexible, reusable, and customizable way to handle file +writing. Here's how it works at a **base level** and how it can be **extended** with subclasses. + +--- + +## **1. Base Level (BaseWriter)** + +The `BaseWriter` is an **abstract base class (ABC)** that defines the core logic and structure for +writing files. It cannot be used directly but provides a foundation that subclasses can build upon. + +### Key Components + +1. **Root Directory**: + - You must specify a `root_directory` where the files will be saved. + - If the directory doesn’t exist and `create_dirs` is `True`, it will be automatically created. + +2. **Filename Format**: + - You define a `filename_format` that specifies how the file names should be structured. + - The format can include placeholders like `{SubjectID}`, `{date}`, or any custom keys. + - Format of placeholders is `{key}` or `%key` which allows for both python code and CLI usage. + - These placeholders are replaced with actual values provided when calling the `save()` method (actual values passed in as keyword arguments(`**kwargs`)). + + ```python + # Example filename_format + filename_format = "Patient_{SubjectID}_{date}.txt" + ``` + +3. **Pattern Resolution**: + - The `BaseWriter` uses a `PatternResolver`, inherited from `Med-ImageTools` to validate and parse the `filename_format`. + - It ensures all placeholders are valid and logs errors if any are missing during file creation. + +4. **Core Methods**: + - `resolve_path(**kwargs)`: Generates the file path by replacing placeholders in the + `filename_format`. + - `save(*args, **kwargs)`: Abstract method. Subclasses implement the logic for writing files. + + ```python + # Example resolve_path usage for an ImplementedWriter subclass + writer = ImplementedWriter( + root_directory="output", + filename_format="{SubjectID}_data_{date}.txt" + ) + file_path = writer.resolve_path(SubjectID="JohnDoe", date="2024-01-01") + print(file_path) + # Output: output/JohnDoe_data_2024-01-01.txt + ``` + +5. **Context Management**: + - `BaseWriter` can be used as a context manager for setup and teardown logic. + - Automatically cleans up empty directories created during file operations. + + ```python + with writer: + writer.save(...) + ``` + +--- + +## **2. Subclass Level** + +Subclasses of `BaseWriter` provide the actual file writing logic. Each subclass must implement the +`save()` method to define how files of a specific type are written. + +### Subclass Responsibilities + +1. **Implement the `save()` Method**: + - This method takes in data (e.g., text, images, or other file types) and writes it to disk using + the path generated by `resolve_path()`. + + ```python + class TextWriter(BaseWriter): + def save(self, content: str, **kwargs: Any) -> Path: + output_path = self.resolve_path(**kwargs) + with output_path.open("w") as file: + file.write(content) + return output_path + ``` + +2. **Handle File-Specific Logic**: + - Each subclass can validate its data or handle specific requirements, such as compression or + formatting. + + ```python + class NIFTIWriter(BaseWriter): + def save(self, image: sitk.Image, **kwargs: Any) -> Path: + output_path = self.resolve_path(**kwargs) + sitk.WriteImage(image, str(output_path), useCompression=True) + return output_path + ``` + +3. **Use Class-Level Validation**: + - Subclasses can define their own validation for file extensions, required placeholders, etc. + +--- + +## **Summary** + +1. **BaseWriter**: + - Defines the core functionality for handling directories, generating paths, and managing + resources. + - Requires `save()` to be implemented by subclasses. + +2. **PatternResolver**: + - Validates and parses filename formats. + +3. **Subclasses**: + - Provide specific file writing logic (e.g., text files, NIFTI images). + - Implement validation, compression, or other requirements. + +By extending `BaseWriter`, you can create flexible and reusable file writers for any type of data. diff --git a/src/readii/io/writers/__init__.py b/src/readii/io/writers/__init__.py new file mode 100644 index 0000000..9bb3553 --- /dev/null +++ b/src/readii/io/writers/__init__.py @@ -0,0 +1 @@ +"""Tools for writing data.""" diff --git a/src/readii/io/writers/base_writer.py b/src/readii/io/writers/base_writer.py new file mode 100644 index 0000000..411d54e --- /dev/null +++ b/src/readii/io/writers/base_writer.py @@ -0,0 +1,101 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from types import TracebackType +from typing import Any, Optional + +from readii.io.utils import PatternResolver +from readii.utils import logger + + +@dataclass +class BaseWriter(ABC): + """Abstract base class for managing file writing with customizable paths and filenames.""" + + # Any subclass has to be initialized with a root directory and a filename format + root_directory: Path + filename_format: str + + # optionally, you can set create_dirs to False if you want to handle the directory creation yourself + create_dirs: bool = field(default=True) + + # class-level pattern resolver instance shared across all instances + pattern_resolver: PatternResolver = field(init=False) + + def __post_init__(self) -> None: + """Initialize the writer with the given root directory and filename format.""" + self.root_directory = Path(self.root_directory) + if self.create_dirs: + self.root_directory.mkdir(parents=True, exist_ok=True) + elif not self.root_directory.exists(): + msg = f"Root directory {self.root_directory} does not exist." + raise FileNotFoundError(msg) + self.pattern_resolver = PatternResolver(self.filename_format) + + @abstractmethod + def save(self, *args: Any, **kwargs: Any) -> Path: # noqa + """Abstract method for writing data. Must be implemented by subclasses.""" + pass + + def _generate_datetime_strings(self) -> dict[str, str]: + now = datetime.now(timezone.utc) + return { + "date": now.strftime("%Y-%m-%d"), + "time": now.strftime("%H%M%S"), + "date_time": now.strftime("%Y-%m-%d_%H%M%S"), + } + + def resolve_path(self, **kwargs: Any) -> Path: # noqa + """Generate a file path based on the filename format, subject ID, and additional parameters.""" + context = {**self._generate_datetime_strings(), **kwargs} + filename = self.pattern_resolver.resolve(context) + out_path = self.root_directory / filename + if self.create_dirs: + out_path.parent.mkdir(parents=True, exist_ok=True) + return out_path + + # Context Manager Implementation + def __enter__(self) -> "BaseWriter": + """ + Enter the runtime context related to this writer. + + Useful if the writer needs to perform setup actions, such as + opening connections or preparing resources. + """ + logger.debug(f"Entering context manager for {self.__class__.__name__}") + return self + + def __exit__( + self: "BaseWriter", + exc_type: Optional[type], + exc_value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: + """ + Exit the runtime context related to this writer. + + Parameters + ---------- + exc_type : Optional[type] + The exception type, if an exception was raised, otherwise None. + exc_value : Optional[BaseException] + The exception instance, if an exception was raised, otherwise None. + traceback : Optional[Any] + The traceback object, if an exception was raised, otherwise None. + """ + if exc_type: + logger.exception( + f"Exception raised in {self.__class__.__name__} while in context manager.", + exc_info=exc_value, + ) + logger.debug(f"Exiting context manager for {self.__class__.__name__}") + + # if the root directory is empty, aka we created it but didn't write anything, delete it + if ( + self.create_dirs + and self.root_directory.exists() + and not any(self.root_directory.iterdir()) + ): + logger.debug(f"Deleting empty directory {self.root_directory}") + self.root_directory.rmdir() # remove the directory if it's empty diff --git a/src/readii/io/writers/nifti_writer.py b/src/readii/io/writers/nifti_writer.py new file mode 100644 index 0000000..52f0130 --- /dev/null +++ b/src/readii/io/writers/nifti_writer.py @@ -0,0 +1,134 @@ +from dataclasses import dataclass, field +from pathlib import Path +from typing import ClassVar + +import numpy as np +import SimpleITK as sitk + +from readii.io.writers.base_writer import BaseWriter +from readii.utils import logger + + +class NiftiWriterError(Exception): + """Base exception for NiftiWriter errors.""" + + pass + + +class NiftiWriterValidationError(NiftiWriterError): + """Raised when validation of writer configuration fails.""" + + pass + + +class NiftiWriterIOError(NiftiWriterError): + """Raised when I/O operations fail.""" + + pass + + +@dataclass +class NIFTIWriter(BaseWriter): + """Class for managing file writing with customizable paths and filenames for NIFTI files.""" + + compression_level: int = field( + default=9, + metadata={ + "help": "Compression level (0-9). Higher values mean better compression but slower writing." + }, + ) + overwrite: bool = field( + default=False, + metadata={ + "help": "If True, allows overwriting existing files. If False, raises FileExistsError." + }, + ) + + # Make extensions immutable + VALID_EXTENSIONS: ClassVar[list[str]] = [ + ".nii", + ".nii.gz", + ] + MAX_COMPRESSION_LEVEL: ClassVar[int] = 9 + MIN_COMPRESSION_LEVEL: ClassVar[int] = 0 + + def __post_init__(self) -> None: + """Validate writer configuration.""" + super().__post_init__() + + if not self.MIN_COMPRESSION_LEVEL <= self.compression_level <= self.MAX_COMPRESSION_LEVEL: + msg = f"Invalid compression level {self.compression_level}. Must be between {self.MIN_COMPRESSION_LEVEL} and {self.MAX_COMPRESSION_LEVEL}." + raise NiftiWriterValidationError(msg) + + if not any(self.filename_format.endswith(ext) for ext in self.VALID_EXTENSIONS): + msg = f"Invalid filename format {self.filename_format}. Must end with one of {self.VALID_EXTENSIONS}." + raise NiftiWriterValidationError(msg) + + def save(self, image: sitk.Image | np.ndarray, PatientID: str, **kwargs: str | int) -> Path: + """Write the SimpleITK image to a NIFTI file. + + Parameters + ---------- + image : sitk.Image | np.ndarray + The SimpleITK image to save + PatientID : str + Required patient identifier + **kwargs : str | int + Additional formatting parameters for the output path + + Returns + ------- + Path + Path to the saved file + + Raises + ------ + NiftiWriterIOError + If file exists and overwrite=False or if writing fails + NiftiWriterValidationError + If image is invalid + """ + match image: + case sitk.Image(): + pass + case np.ndarray(): + image = sitk.GetImageFromArray(image) + case _: + msg = "Input must be a SimpleITK Image or a numpy array" + raise NiftiWriterValidationError(msg) + + logger.debug("Saving.", kwargs=kwargs) + + out_path = self.resolve_path(PatientID=PatientID, **kwargs) + if out_path.exists(): + if not self.overwrite: + msg = f"File {out_path} already exists. \nSet {self.__class__.__name__}.overwrite to True to overwrite." + raise NiftiWriterIOError(msg) + else: + logger.warning(f"File {out_path} already exists. Overwriting.") + + logger.debug("Writing image to file", out_path=out_path) + try: + sitk.WriteImage( + image, str(out_path), useCompression=True, compressionLevel=self.compression_level + ) + except Exception as e: + msg = f"Error writing image to file {out_path}: {e}" + raise NiftiWriterIOError(msg) from e + else: + logger.info("Image saved successfully.", out_path=out_path) + return out_path + + +if __name__ == "__main__": # pragma: no cover + from rich import print # noqa + + nifti_writer = NIFTIWriter( + root_directory=Path("TRASH", "nifti_writer_examples"), + filename_format="{NegativeControl}_{Region}/{SubjectID}_{Modality}.nii.gz", + compression_level=9, + overwrite=False, + create_dirs=True, + ) + + print(nifti_writer) diff --git a/tests/io/test_base_writer.py b/tests/io/test_base_writer.py new file mode 100644 index 0000000..e3bf1f8 --- /dev/null +++ b/tests/io/test_base_writer.py @@ -0,0 +1,81 @@ + +import os +import pytest +from pathlib import Path +from readii.io.writers.base_writer import BaseWriter # type: ignore + +class SimpleWriter(BaseWriter): + def save(self, content: str) -> Path: + file_path = self.resolve_path() + with open(file_path, 'w') as f: + f.write(content) + return file_path + +class MediumWriter(BaseWriter): + def save(self, content: str, suffix: str = '') -> Path: + file_path = self.resolve_path(suffix=suffix) + with open(file_path, 'w') as f: + f.write(content) + return file_path + +class ComplexWriter(BaseWriter): + def save(self, content: str, metadata: dict) -> Path: + file_path = self.resolve_path(**metadata) + with open(file_path, 'w') as f: + f.write(content) + return file_path + +@pytest.fixture +def temp_dir(tmp_path): + return tmp_path + +def test_simple_writer(temp_dir): + writer = SimpleWriter(root_directory=temp_dir, filename_format="{date_time}.txt") + with writer: + file_path = writer.save("Simple content") + assert file_path.exists() + assert file_path.read_text() == "Simple content" + +def test_medium_writer(temp_dir): + writer = MediumWriter(root_directory=temp_dir, filename_format="{date_time}_{suffix}.txt") + with writer: + file_path = writer.save("Medium content", suffix="test") + assert file_path.exists() + assert file_path.read_text() == "Medium content" + +def test_complex_writer(temp_dir): + writer = ComplexWriter(root_directory=temp_dir, filename_format="{date_time}_{user}.txt") + with writer: + file_path = writer.save("Complex content", metadata={"user": "testuser"}) + assert file_path.exists() + assert file_path.read_text() == "Complex content" + +def test_context_manager_cleanup(temp_dir): + subdir = temp_dir / "nested" + writer = SimpleWriter(root_directory=subdir, filename_format="{date_time}.txt") + with writer: + assert subdir.exists() + assert not subdir.exists() + +def test_directory_creation(temp_dir): + writer = SimpleWriter(root_directory=temp_dir / "nested", filename_format="{date_time}.txt") + with writer: + file_path = writer.save("Content") + assert file_path.exists() + assert file_path.read_text() == "Content" + assert (temp_dir / "nested").exists() + +def test_directory_not_created_if_exists(temp_dir): + existing_dir = temp_dir / "existing" + existing_dir.mkdir() + writer = SimpleWriter(root_directory=existing_dir, filename_format="{date_time}.txt") + with writer: + file_path = writer.save("Content") + assert file_path.exists() + assert file_path.read_text() == "Content" + assert existing_dir.exists() + +def test_no_create_dirs_non_existent(temp_dir): + with pytest.raises(FileNotFoundError): + with SimpleWriter(root_directory=temp_dir / "nested_non_existent", filename_format="{date_time}.txt", create_dirs=False) as writer: + file_path = writer.save("Content") diff --git a/tests/io/test_nifti_writer.py b/tests/io/test_nifti_writer.py new file mode 100644 index 0000000..70eae1b --- /dev/null +++ b/tests/io/test_nifti_writer.py @@ -0,0 +1,73 @@ +import pytest +import SimpleITK as sitk +import numpy as np +from pathlib import Path +from readii.io.writers.nifti_writer import NIFTIWriter, NiftiWriterValidationError, NiftiWriterIOError # type: ignore + +@pytest.fixture +def sample_image(): + """Fixture for creating a sample SimpleITK image.""" + image = sitk.Image(10, 10, sitk.sitkUInt8) + return image + +@pytest.fixture +def sample_array(): + """Fixture for creating a sample numpy array.""" + array = np.zeros((10, 10), dtype=np.uint8) + return array + +@pytest.fixture +def nifti_writer(tmp_path): + """Fixture for creating a NIFTIWriter instance.""" + return NIFTIWriter( + root_directory=tmp_path, + filename_format="{PatientID}.nii.gz", + compression_level=5, + overwrite=False, + create_dirs=True, + ) + +@pytest.mark.parametrize("image", ["not_an_image", 12345]) +def test_save_invalid_image(nifti_writer, image): + """Test saving an invalid image.""" + with pytest.raises(NiftiWriterValidationError): + nifti_writer.save(image=image, PatientID="12345") + +@pytest.mark.parametrize("image", ["sample_image", "sample_array"]) +def test_save_valid_image(nifti_writer, request, image): + """Test saving a valid image.""" + image = request.getfixturevalue(image) + out_path = nifti_writer.save(image=image, PatientID="12345") + assert out_path.exists() + +def test_save_existing_file_without_overwrite(nifti_writer, sample_image): + """Test saving when file already exists and overwrite is False.""" + nifti_writer.save(sample_image, PatientID="12345") + with pytest.raises(NiftiWriterIOError): + nifti_writer.save(sample_image, PatientID="12345") + +def test_save_existing_file_with_overwrite(nifti_writer, sample_image): + """Test saving when file already exists and overwrite is True.""" + nifti_writer.overwrite = True + nifti_writer.save(sample_image, PatientID="12345") + assert nifti_writer.save(sample_image, PatientID="12345").exists() + +@pytest.mark.parametrize("compression_level", [0, 5, 9]) +def test_save_with_different_compression_levels(nifti_writer, sample_image, compression_level): + """Test saving with different compression levels.""" + nifti_writer.compression_level = compression_level + out_path = nifti_writer.save(sample_image, PatientID="12345") + assert out_path.exists() + +@pytest.mark.parametrize("filename_format", ["{PatientID}.nii", "{PatientID}.nii.gz"]) +def test_save_with_different_filename_formats(nifti_writer, sample_image, filename_format): + """Test saving with different filename formats.""" + nifti_writer.filename_format = filename_format + out_path = nifti_writer.save(sample_image, PatientID="12345") + assert out_path.exists() + +@pytest.mark.parametrize("key,value", [("Modality", "T1"), ("Region", "Brain")]) +def test_save_with_additional_keys(nifti_writer, sample_image, key, value): + """Test saving with additional keys.""" + out_path = nifti_writer.save(sample_image, PatientID="12345", **{key: value}) + assert out_path.exists() diff --git a/tests/io/test_pattern_resolver.py b/tests/io/test_pattern_resolver.py new file mode 100644 index 0000000..f77da6d --- /dev/null +++ b/tests/io/test_pattern_resolver.py @@ -0,0 +1,23 @@ +import pytest +from readii.io.utils import PatternResolver, PatternResolverError # type: ignore + +@pytest.mark.parametrize("pattern, context, expected", [ + ("{subject_id}_{date}/{disease}.txt", {"subject_id": "JohnDoe", "date": "2025-01-01", "disease": "cancer"}, "JohnDoe_2025-01-01/cancer.txt"), + ("{subject_id}_{date}/{disease}.txt", {"subject_id": "JohnDoe", "date": "2025-01-01"}, PatternResolverError), + ("{subject_id}_{date}/{disease.txt", {}, PatternResolverError), + # New complex test cases + ("{subject_id}_{date}/{disease}/{sample_id}.txt", {"subject_id": "JaneDoe", "date": "2025-01-01", "disease": "flu", "sample_id": "S123"}, "JaneDoe_2025-01-01/flu/S123.txt"), + ("{subject_id}_{date}/{disease}/{sample_id}.txt", {"subject_id": "JaneDoe", "date": "2025-01-01", "disease": "flu"}, PatternResolverError), + ("{subject_id}_{date}/{disease}/{sample_id}.txt", {"subject_id": "JaneDoe", "date": "2025-01-01", "disease": "flu", "sample_id": ""}, "JaneDoe_2025-01-01/flu/.txt"), + ("{subject_id}_{date}/{disease}/{sample_id}.txt", {"subject_id": "JaneDoe", "date": "2025-01-01", "disease": "flu", "sample_id": None}, PatternResolverError), + ("{subject_id}_{date}/{disease}/{sample_id}.txt", {"subject_id": "JaneDoe", "date": "2025-01-01", "disease": "flu", "sample_id": "S123", "extra_key": "extra_value"}, "JaneDoe_2025-01-01/flu/S123.txt"), +]) +def test_resolve(pattern, context, expected): + if isinstance(expected, type) and issubclass(expected, Exception): + with pytest.raises(expected): + resolver = PatternResolver(pattern) + resolver.resolve(context) + else: + resolver = PatternResolver(pattern) + result = resolver.resolve(context) + assert result == expected