From 14e71a703e47ff71ecb56cc02bcdc7a61f9d399f Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Fri, 12 Apr 2024 17:04:06 -0400 Subject: [PATCH 01/13] rewrite dicom archive --- install/requirements/requirements.txt | 1 + pyproject.toml | 3 + python/dicom_archive.py | 376 ++++++++++++++++++++++++++ python/dicom_summary.py | 50 ++++ python/lib/db/queries/session.py | 1 - python/lib/dicom/dicom_database.py | 134 +++++++++ python/lib/dicom/dicom_log.py | 73 +++++ python/lib/dicom/summary_make.py | 216 +++++++++++++++ python/lib/dicom/summary_type.py | 101 +++++++ python/lib/dicom/summary_write.py | 130 +++++++++ python/lib/dicom/text.py | 79 ++++++ python/lib/dicom/text_dict.py | 43 +++ python/lib/dicom/text_table.py | 52 ++++ python/lib/utilities.py | 19 ++ 14 files changed, 1277 insertions(+), 1 deletion(-) create mode 100644 python/dicom_archive.py create mode 100644 python/dicom_summary.py create mode 100644 python/lib/dicom/dicom_database.py create mode 100644 python/lib/dicom/dicom_log.py create mode 100644 python/lib/dicom/summary_make.py create mode 100644 python/lib/dicom/summary_type.py create mode 100644 python/lib/dicom/summary_write.py create mode 100644 python/lib/dicom/text.py create mode 100644 python/lib/dicom/text_dict.py create mode 100644 python/lib/dicom/text_table.py diff --git a/install/requirements/requirements.txt b/install/requirements/requirements.txt index 5f84aaaaa..96a615e7f 100644 --- a/install/requirements/requirements.txt +++ b/install/requirements/requirements.txt @@ -11,6 +11,7 @@ nose numpy protobuf>=3.0.0 pybids==0.17.0 +pydicom pyright pytest python-dateutil diff --git a/pyproject.toml b/pyproject.toml index eb0285357..ca87598a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,8 +17,11 @@ select = ["E", "F", "I", "N", "UP", "W"] # `test` directory. [tool.pyright] include = [ + "python/dicom_archive.py", + "python/dicom_summary.py", "python/tests", "python/lib/db", + "python/lib/dicom", "python/lib/exception", "python/lib/util", "python/lib/config_file.py", diff --git a/python/dicom_archive.py b/python/dicom_archive.py new file mode 100644 index 000000000..c6cabe00f --- /dev/null +++ b/python/dicom_archive.py @@ -0,0 +1,376 @@ +#!/usr/bin/env python + +from typing import Any, cast +import gzip +import os +import shutil +import sys +import tarfile + +from lib.db.connect import connect_to_db +import lib.dicom.dicom_database +import lib.dicom.dicom_log +import lib.dicom.summary_make +import lib.dicom.summary_write +import lib.dicom.text +import lib.exitcode +from lib.lorisgetopt import LorisGetOpt +import lib.database +from lib.db.model.dicom_archive import DbDicomArchive +from lib.db.query.dicom_archive import try_get_dicom_archive_with_study_uid +from lib.db.query.mri_upload import try_get_mri_upload_with_id +from lib.db.query.session import try_get_session_with_cand_id_visit_label + + +def print_error_exit(message: str, code: int): + print(f'ERROR: {message}', file=sys.stderr) + sys.exit(code) + + +def print_warning(message: str): + print(f'WARNING: {message}', file=sys.stderr) + + +class Args: + profile: str + source: str + target: str + today: bool + year: bool + overwrite: bool + insert: bool + update: bool + upload: int | None + session: bool + verbose: bool + + def __init__(self, options_dict: dict[str, Any]): + self.profile = options_dict['profile']['value'] + self.source = options_dict['source']['value'] + self.target = options_dict['target']['value'] + self.today = options_dict['today']['value'] + self.year = options_dict['year']['value'] + self.overwrite = options_dict['overwrite']['value'] + self.insert = options_dict['insert']['value'] + self.update = options_dict['update']['value'] + self.upload = options_dict['upload']['value'] + self.session = options_dict['session']['value'] + self.verbose = options_dict['verbose']['value'] + + +def check_create_file(args: Args, path: str): + if os.path.exists(path): + if args.overwrite: + print_warning(f'Overwriting \'{path}\'') + else: + print_error_exit( + ( + f'File or directory \'{path}\' already exists. ' + 'Use option \'--overwrite\' to overwrite it.' + ), + lib.exitcode.TARGET_EXISTS_NO_CLOBBER, + ) + + +def main(): + usage = ( + "\n" + + "********************************************************************\n" + " DICOM ARCHIVING SCRIPT\n" + "********************************************************************\n" + "The program reads a DICOM directory, processes it into a structured and " + "compressed archive, and insert it or upload it to the LORIS database." + + "usage : dicom_archive.py -p -s -t ...\n\n" + + "options: \n" + "\t-p, --profile : Name of the python database config file in dicom-archive/.loris_mri\n" + "\t-s, --source : Source directory containing the DICOM files to archive\n" + "\t-t, --target : Directory in which to place the resulting DICOM archive\n" + "\t --today : Use today's date as the scan date instead of the DICOM scan date\n" + "\t --year : Create the archive in a year subdirectory (example: 2024/DCM_2024-08-27_FooBar.tar)s\n" + "\t --overwrite : Overwrite the DICOM archive file if it already exists\n" + "\t --insert : Insert the created DICOM archive in the database (requires the archive\n" + "\t to not be already inserted)\n" + "\t --update : Update the DICOM archive in the database (requires the archive to be\n" + "\t already be inserted), generally used with --overwrite" + "\t --upload : Associate the DICOM archive with an existing MRI upload in the database, which is\n" + " updated accordingly" + "\t --session : Determine the session for the DICOM archive using the LORIS configuration, and associate\n" + " them accordingly" + "\t-v, --verbose : If set, be verbose\n\n" + + "required options are: \n" + "\t--profile\n" + "\t--source\n" + "\t--target\n\n" + ) + + # NOTE: Some options do not have short options but LorisGetOpt does not support that, so we + # repeat the long names. + options_dict = { + "profile": { + "value": None, "required": True, "expect_arg": True, "short_opt": "p", "is_path": False + }, + "source": { + "value": None, "required": True, "expect_arg": True, "short_opt": "s", "is_path": True, + }, + "target": { + "value": None, "required": True, "expect_arg": True, "short_opt": "t", "is_path": True, + }, + "today": { + "value": False, "required": False, "expect_arg": False, "short_opt": "today", "is_path": False, + }, + "year": { + "value": False, "required": False, "expect_arg": False, "short_opt": "year", "is_path": False, + }, + "overwrite": { + "value": False, "required": False, "expect_arg": False, "short_opt": "overwrite", "is_path": False, + }, + "insert": { + "value": False, "required": False, "expect_arg": False, "short_opt": "insert", "is_path": False, + }, + "update": { + "value": False, "required": False, "expect_arg": False, "short_opt": "update", "is_path": False, + }, + "upload": { + "value": True, "required": False, "expect_arg": False, "short_opt": "upload", "is_path": False, + }, + "session": { + "value": False, "required": False, "expect_arg": False, "short_opt": "session", "is_path": False, + }, + "verbose": { + "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False + }, + "help": { + "value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False + }, + } + + # Get the CLI arguments and connect to the database + + loris_getopt_obj = LorisGetOpt(usage, options_dict, os.path.basename(__file__[:-3])) + args = Args(loris_getopt_obj.options_dict) + + config = cast(Any, loris_getopt_obj.config_info).mysql + db = connect_to_db(config.mysql) + old_db = lib.database.Database(config.mysql, args.verbose) + get_subject_ids = None + try: + get_subject_ids = config.get_subject_ids + except AttributeError: + print_error_exit( + 'Config file does not contain a `get_subject_ids` function.', + lib.exitcode.BAD_CONFIG_SETTING, + ) + + # Check arguments + + if args.insert and args.update: + print_error_exit( + 'Arguments \'--insert\' and \'--update\' must not be set both at the same time.', + lib.exitcode.INVALID_ARG, + ) + + if not os.path.isdir(args.source) or not os.access(args.source, os.R_OK): + print_error_exit( + 'Argument \'--source\' must be a readable directory path.', + lib.exitcode.INVALID_ARG, + ) + + if not os.path.isdir(args.target) or not os.access(args.target, os.W_OK): + print_error_exit( + 'Argument \'--target\' must be a writable directory path.', + lib.exitcode.INVALID_ARG, + ) + + if (args.session or args.upload is not None) and not (args.insert or args.update): + print_error_exit( + 'Arguments \'--db-insert\' or \'--db-update\' must be set when \'--db-session\' or \'--db-upload\' is set.', + lib.exitcode.INVALID_ARG, + ) + + # Check paths + + base_name = os.path.basename(args.source) + + tar_path = f'{args.target}/{base_name}.tar' + zip_path = f'{args.target}/{base_name}.tar.gz' + summary_path = f'{args.target}/{base_name}.meta' + log_path = f'{args.target}/{base_name}.log' + + check_create_file(args, tar_path) + check_create_file(args, zip_path) + check_create_file(args, summary_path) + check_create_file(args, log_path) + + # Check MRI upload + + mri_upload = None + if args.upload is not None: + mri_upload = try_get_mri_upload_with_id(db, args.upload) + if mri_upload is None: + print_error_exit( + f'No MRI upload found in the database with id {args.upload}.', + lib.exitcode.UPDATE_FAILURE, + ) + + print('Extracting DICOM information (may take a long time)') + + summary = lib.dicom.summary_make.make(args.source, args.verbose) + + print('Checking database presence') + + dicom_archive = try_get_dicom_archive_with_study_uid(db, summary.info.study_uid) + + if args.insert and dicom_archive is not None: + print_error_exit( + ( + f'Study \'{summary.info.study_uid}\' is already inserted in the database\n' + 'Previous archiving log:\n' + f'{dicom_archive.create_info}' + ), + lib.exitcode.INSERT_FAILURE, + ) + + if args.update and dicom_archive is None: + print_error_exit( + f'No study \'{summary.info.study_uid}\' found in the database', + lib.exitcode.UPDATE_FAILURE, + ) + + session = None + if args.session: + get_subject_ids = cast(Any, get_subject_ids) + + print('Determine session from configuration') + + ids = get_subject_ids(old_db, summary.info.patient.name) + cand_id = ids['CandID'] + visit_label = ids['visitLabel'] + session = try_get_session_with_cand_id_visit_label(db, cand_id, visit_label) + + if session is None: + print_error_exit( + ( + f'No session found in the database for patient name \'{summary.info.patient.name}\' ' + f'and visit label \'{visit_label}\'.' + ), + lib.exitcode.GET_SESSION_ID_FAILURE, + ) + + print('Copying into DICOM tar') + + with tarfile.open(tar_path, 'w') as tar: + for file in os.listdir(args.source): + tar.add(args.source + '/' + file) + + print('Calculating DICOM tar MD5 sum') + + tarball_md5_sum = lib.dicom.text.make_hash(tar_path, True) + + print('Zipping DICOM tar (may take a long time)') + + with open(tar_path, 'rb') as tar: + # 6 is the default compression level of the tar command, Python's + # default is 9, which is more powerful but also too slow. + with gzip.open(zip_path, 'wb', compresslevel=6) as zip: + shutil.copyfileobj(tar, zip) + + print('Calculating DICOM zip MD5 sum') + + zipball_md5_sum = lib.dicom.text.make_hash(zip_path, True) + + print('Getting DICOM scan date') + + if not args.today and summary.info.scan_date is None: + print_warning(( + 'No scan date was found in the DICOMs, ' + 'consider using argument \'--today\' to use today\'s date as the scan date.' + )) + + if args.year and summary.info.scan_date is None: + print_warning(( + 'Argument \'--year\' was provided but no scan date was found in the DICOMs, ' + 'the argument will be ignored.' + )) + + if args.year and summary.info.scan_date is not None: + dir_path = f'{args.target}/{summary.info.scan_date.year}' + if not os.path.exists(dir_path): + print(f'Creating directory \'{dir_path}\'') + os.mkdir(dir_path) + elif not os.path.isdir(dir_path) or not os.access(dir_path, os.W_OK): + print_error_exit( + f'Path \'{dir_path}\' exists but is not a writable directory.', + lib.exitcode.CREATE_DIR_FAILURE, + ) + else: + dir_path = args.target + + if summary.info.scan_date is not None: + scan_date_string = lib.dicom.text.write_date(summary.info.scan_date) + archive_path = f'{dir_path}/DCM_{scan_date_string}_{base_name}.tar' + else: + archive_path = f'{dir_path}/DCM_{base_name}.tar' + + check_create_file(args, archive_path) + + log = lib.dicom.dicom_log.make(args.source, archive_path, tarball_md5_sum, zipball_md5_sum) + + if args.verbose: + print('The archive will be created with the following arguments:') + print(lib.dicom.dicom_log.write_to_string(log)) + + print('Writing summary file') + + lib.dicom.summary_write.write_to_file(summary_path, summary) + + print('Writing log file') + + lib.dicom.dicom_log.write_to_file(log_path, log) + + print('Copying into DICOM archive') + + with tarfile.open(archive_path, 'w') as tar: + tar.add(zip_path, os.path.basename(zip_path)) + tar.add(summary_path, os.path.basename(summary_path)) + tar.add(log_path, os.path.basename(log_path)) + + print('Removing temporary files') + + os.remove(tar_path) + os.remove(zip_path) + os.remove(summary_path) + os.remove(log_path) + + print('Calculating DICOM tar MD5 sum') + + log.archive_md5_sum = lib.dicom.text.make_hash(log.target_path, True) + + if args.insert: + lib.dicom.dicom_database.insert(db, log, summary) + + if args.update: + # Safe because we checked previously that the DICOM archive is not `None` + dicom_archive = cast(DbDicomArchive, dicom_archive) + lib.dicom.dicom_database.update(db, dicom_archive, log, summary) + + if mri_upload is not None: + print('Updating MRI upload in the database') + dicom_archive = cast(DbDicomArchive, dicom_archive) + dicom_archive.upload = mri_upload + + if session is not None: + dicom_archive = cast(DbDicomArchive, dicom_archive) + dicom_archive.session = session + + if mri_upload is not None: + mri_upload.session = session + + print('Success') + + +if __name__ == "__main__": + main() diff --git a/python/dicom_summary.py b/python/dicom_summary.py new file mode 100644 index 000000000..b1abede34 --- /dev/null +++ b/python/dicom_summary.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +import argparse +from dataclasses import dataclass +import sys +import traceback + +import lib.dicom.summary_make +import lib.dicom.summary_write +import lib.exitcode + +parser = argparse.ArgumentParser(description=( + 'Read a DICOM directory and print the DICOM summary of this directory ' + 'in the the console.' + )) + +parser.add_argument( + 'directory', + help='The DICOM directory') + +parser.add_argument( + '--verbose', + action='store_true', + help='Set the script to be verbose') + + +@dataclass +class Args: + directory: str + verbose: bool + + +def main(): + parsed_args = parser.parse_args() + args = Args(parsed_args.directory, parsed_args.verbose) + + try: + summary = lib.dicom.summary_make.make(args.directory, args.verbose) + except Exception as e: + print(f'ERROR: Cannot create a summary for the directory \'{args.directory}\'.', file=sys.stderr) + print('Exception message:', file=sys.stderr) + print(e, file=sys.stderr) + traceback.print_exc(file=sys.stderr) + exit(lib.exitcode.INVALID_DICOM) + + print(lib.dicom.summary_write.write_to_string(summary)) + + +if __name__ == "__main__": + main() diff --git a/python/lib/db/queries/session.py b/python/lib/db/queries/session.py index 2e2b8b4b2..a32d3cbd7 100644 --- a/python/lib/db/queries/session.py +++ b/python/lib/db/queries/session.py @@ -1,4 +1,3 @@ - from sqlalchemy import select from sqlalchemy.orm import Session as Database diff --git a/python/lib/dicom/dicom_database.py b/python/lib/dicom/dicom_database.py new file mode 100644 index 000000000..8367962ec --- /dev/null +++ b/python/lib/dicom/dicom_database.py @@ -0,0 +1,134 @@ +from datetime import datetime +from sqlalchemy.orm import Session as Database +from lib.db.model.dicom_archive import DbDicomArchive +from lib.db.model.dicom_archive_file import DbDicomArchiveFile +from lib.db.model.dicom_archive_series import DbDicomArchiveSeries +from lib.db.query.dicom_archive import delete_dicom_archive_file_series, get_dicom_archive_series_with_file_info +from lib.dicom.summary_type import Summary +from lib.dicom.dicom_log import DicomArchiveLog +import lib.dicom.text +import lib.dicom.summary_write +import lib.dicom.dicom_log + + +def populate_dicom_archive( + dicom_archive: DbDicomArchive, + log: DicomArchiveLog, + summary: Summary, + archive_path: str, + session_id: int | None, +): + """ + Populate a DICOM archive with information from its DICOM archiving log and DICOM summary. + + :param dicom_archive: The DICOM archive ORM object to populate. + :param log: The DICOM arching log object. + :param summary: The DICOM summary object. + :param session_id: The optional session ID associated with the DICOM archive. + """ + + dicom_archive.study_uid = summary.info.study_uid + dicom_archive.patient_id = summary.info.patient.id + dicom_archive.patient_name = summary.info.patient.name + dicom_archive.patient_birthdate = summary.info.patient.birth_date + dicom_archive.patient_sex = summary.info.patient.sex + dicom_archive.neuro_db_center_name = None + dicom_archive.center_name = summary.info.institution or '' + dicom_archive.last_update = None + dicom_archive.date_acquired = summary.info.scan_date + dicom_archive.date_last_archived = datetime.now() + dicom_archive.acquisition_count = len(summary.acquis) + dicom_archive.dicom_file_count = len(summary.dicom_files) + dicom_archive.non_dicom_file_count = len(summary.other_files) + dicom_archive.md5_sum_dicom_only = log.tarball_md5_sum + dicom_archive.md5_sum_archive = log.archive_md5_sum + dicom_archive.creating_user = log.creator_name + dicom_archive.sum_type_version = log.summary_version + dicom_archive.tar_type_version = log.archive_version + dicom_archive.source_location = log.source_path + dicom_archive.archive_location = archive_path + dicom_archive.scanner_manufacturer = summary.info.scanner.manufacturer + dicom_archive.scanner_model = summary.info.scanner.model + dicom_archive.scanner_serial_number = summary.info.scanner.serial_number + dicom_archive.scanner_software_version = summary.info.scanner.software_version + dicom_archive.session_id = session_id + dicom_archive.upload_attempt = 0 + dicom_archive.create_info = lib.dicom.dicom_log.write_to_string(log) + dicom_archive.acquisition_metadata = lib.dicom.summary_write.write_to_string(summary) + dicom_archive.date_sent = None + dicom_archive.pending_transfer = 0 + + +def insert(db: Database, log: DicomArchiveLog, summary: Summary): + """ + Insert a DICOM archive into the database. + + :param db: The database. + :param log: The archiving log of the DICOM archive. + :param summary: The summary of the DICOM archive. + """ + + dicom_archive = DbDicomArchive() + populate_dicom_archive(dicom_archive, log, summary, 'TODO', None) + dicom_archive.date_first_archived = datetime.now() + db.add(dicom_archive) + insert_files_series(db, dicom_archive, summary) + return dicom_archive + + +def insert_files_series(db: Database, dicom_archive: DbDicomArchive, summary: Summary): + for acqui in summary.acquis: + db.add(DbDicomArchiveSeries( + archive_id = dicom_archive.id, + series_number = acqui.series_number, + series_description = acqui.series_description, + sequence_name = acqui.sequence_name, + echo_time = acqui.echo_time, + repetition_time = acqui.repetition_time, + inversion_time = acqui.inversion_time, + slice_thickness = acqui.slice_thickness, + phase_encoding = acqui.phase_encoding, + number_of_files = acqui.number_of_files, + series_uid = acqui.series_uid, + modality = acqui.modality, + )) + + for file in summary.dicom_files: + series = get_dicom_archive_series_with_file_info( + db, + file.series_uid or '', + file.series_number or 1, + file.echo_time, + file.sequence_name or '', + ) + + db.add(DbDicomArchiveFile( + archive_id = dicom_archive.id, + series_number = file.series_number, + file_number = file.file_number, + echo_number = file.echo_number, + series_description = file.series_description, + md5_sum = file.md5_sum, + file_name = file.file_name, + series_id = series.id, + )) + + +def update(db: Database, dicom_archive: DbDicomArchive, log: DicomArchiveLog, summary: Summary): + """ + Insert a DICOM archive into the database. + + :param db: The database. + :param archive: The DICOM archive to update. + :param log: The archiving log of the DICOM archive. + :param summary: The summary of the DICOM archive. + """ + + # Delete the associated database DICOM files and series. + delete_dicom_archive_file_series(db, dicom_archive) + + # Update the database record with the new DICOM information. + populate_dicom_archive(dicom_archive, log, summary, 'TODO', None) + + # Insert the new DICOM files and series. + insert_files_series(db, dicom_archive, summary) diff --git a/python/lib/dicom/dicom_log.py b/python/lib/dicom/dicom_log.py new file mode 100644 index 000000000..37fc18503 --- /dev/null +++ b/python/lib/dicom/dicom_log.py @@ -0,0 +1,73 @@ +from dataclasses import dataclass +from datetime import datetime +import os +import socket +from lib.dicom.text_dict import DictWriter + + +@dataclass +class DicomArchiveLog: + """ + DICOM archiving log object, containg information about the archiving of a + DICOM directory. + """ + + source_path: str + target_path: str + creator_host: str + creator_os: str + creator_name: str + archive_date: str + summary_version: int + archive_version: int + tarball_md5_sum: str + zipball_md5_sum: str + archive_md5_sum: str + + +def write_to_string(log: DicomArchiveLog): + """ + Serialize a DICOM archiving log object into a string. + """ + return DictWriter([ + ('Taken from dir' , log.source_path), + ('Archive target location' , log.target_path), + ('Name of creating host' , log.creator_host), + ('Name of host OS' , log.creator_os), + ('Created by user' , log.creator_name), + ('Archived on' , log.archive_date), + ('dicomSummary version' , log.summary_version), + ('dicomTar version' , log.archive_version), + ('md5sum for DICOM tarball' , log.tarball_md5_sum), + ('md5sum for DICOM tarball gzipped' , log.zipball_md5_sum), + ('md5sum for complete archive' , log.archive_md5_sum), + ]).write() + + +def write_to_file(file_path: str, log: DicomArchiveLog): + """ + Serialize a DICOM archiving log object into a text file. + """ + string = write_to_string(log) + with open(file_path, 'w') as file: + file.write(string) + + +def make(source: str, target: str, tarball_md5_sum: str, zipball_md5_sum: str): + """ + Create a DICOM archiving log object from the provided arguments on a DICOM + directory, as well as the current execution environment. + """ + return DicomArchiveLog( + source, + target, + socket.gethostname(), + os.uname().sysname, + os.environ['USER'], + datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'), + 2, + 2, + tarball_md5_sum, + zipball_md5_sum, + 'Provided in database only', + ) diff --git a/python/lib/dicom/summary_make.py b/python/lib/dicom/summary_make.py new file mode 100644 index 000000000..60f289eaf --- /dev/null +++ b/python/lib/dicom/summary_make.py @@ -0,0 +1,216 @@ +from functools import cmp_to_key +import os +import pydicom +import pydicom.errors +from lib.dicom.summary_type import Summary, Info, Patient, Scanner, Acquisition, DicomFile, OtherFile +from lib.dicom.text import make_hash, read_dicom_date_none +from lib.utilities import get_all_files + + +def get_value(dicom: pydicom.Dataset, tag: str): + """ + Get a required value from a DICOM. + """ + + if tag not in dicom: + raise Exception(f'Expected DICOM tag \'{tag}\' but found none.') + + return dicom[tag].value + + +def get_value_none(dicom: pydicom.Dataset, tag: str): + """ + Get a nullable value from a DICOM. + """ + + if tag not in dicom: + return None + + return dicom[tag].value or None + + +def cmp_int_none(a: int | None, b: int | None): + """ + Order comparison between two nullable integers. + The returned value is in accordance with `functools.cmp_to_key`. + https://docs.python.org/3/library/functools.html#functools.cmp_to_key + """ + + match a, b: + case None, None: + return 0 + case _, None: + return -1 + case None, _: + return 1 + case a, b: + return a - b + + +def cmp_string_none(a: str | None, b: str | None): + """ + Order comparison between two nullable strings. + The returned value is in accordance with `functools.cmp_to_key`. + https://docs.python.org/3/library/functools.html#functools.cmp_to_key + """ + + match a, b: + case None, None: + return 0 + case _, None: + return -1 + case None, _: + return 1 + case a, b if a < b: + return -1 + case a, b if a > b: + return 1 + case a, b: + return 0 + + +def cmp_files(a: DicomFile, b: DicomFile): + """ + Compare the order of two files to sort them in the summary. + """ + + return \ + cmp_int_none(a.series_number, b.series_number) or \ + cmp_int_none(a.file_number, b.file_number) or \ + cmp_int_none(a.echo_number, b.echo_number) + + +def cmp_acquis(a: Acquisition, b: Acquisition): + """ + Compare the order of two acquisitions to sort them in the summary. + """ + + return \ + a.series_number - b.series_number or \ + cmp_string_none(a.sequence_name, b.sequence_name) + + +def make(dir_path: str, verbose: bool): + """ + Create a DICOM summary object from a DICOM directory path. + """ + + info = None + dicom_files: list[DicomFile] = [] + other_files: list[OtherFile] = [] + acquis_dict: dict[tuple[int, int | None, str | None], Acquisition] = dict() + + file_paths = get_all_files(dir_path) + for i, file_path in enumerate(file_paths): + if verbose: + print(f'Processing file \'{file_path}\' ({i + 1}/{len(file_paths)})') + + try: + dicom = pydicom.dcmread(dir_path + '/' + file_path) # type: ignore + if info is None: + info = make_info(dicom) + + dicom_files.append(make_dicom_file(dicom)) + + series = dicom.SeriesNumber + echo = get_value_none(dicom, 'EchoNumbers') + sequence = get_value_none(dicom, 'SequenceName') + + if (series, sequence, echo) not in acquis_dict: + acquis_dict[(series, sequence, echo)] = make_acqui(dicom) + + acquis_dict[(series, sequence, echo)].number_of_files += 1 + except pydicom.errors.InvalidDicomError: + other_files.append(make_other_file(dir_path + '/' + file_path)) + + if info is None: + raise Exception('Found no DICOM file in the directory.') + + acquis = list(acquis_dict.values()) + + dicom_files = sorted(dicom_files, key=cmp_to_key(cmp_files)) + acquis = sorted(acquis, key=cmp_to_key(cmp_acquis)) + + return Summary(info, acquis, dicom_files, other_files) + + +def make_info(dicom: pydicom.Dataset): + """ + Create an `Info` object from a DICOM file, containing general information + about the DICOM directory. + """ + + birth_date = read_dicom_date_none(get_value_none(dicom, 'PatientBirthDate')) + scan_date = read_dicom_date_none(get_value_none(dicom, 'StudyDate')) + + patient = Patient( + get_value(dicom, 'PatientID'), + get_value(dicom, 'PatientName'), + get_value_none(dicom, 'PatientSex'), + birth_date, + ) + + scanner = Scanner( + get_value(dicom, 'Manufacturer'), + get_value(dicom, 'ManufacturerModelName'), + get_value(dicom, 'DeviceSerialNumber'), + get_value(dicom, 'SoftwareVersions'), + ) + + return Info( + get_value(dicom, 'StudyInstanceUID'), + patient, + scanner, + scan_date, + get_value_none(dicom, 'InstitutionName'), + get_value(dicom, 'Modality'), + ) + + +def make_dicom_file(dicom: pydicom.Dataset): + """ + Create a `DicomFile` object from a DICOM file, containing information about + this DICOM file. + """ + return DicomFile( + os.path.basename(dicom.filename), + make_hash(dicom.filename), + get_value_none(dicom, 'SeriesNumber'), + get_value_none(dicom, 'SeriesInstanceUID'), + get_value_none(dicom, 'SeriesDescription'), + get_value_none(dicom, 'InstanceNumber'), + get_value_none(dicom, 'EchoNumbers'), + get_value_none(dicom, 'EchoTime'), + get_value_none(dicom, 'SequenceName'), + ) + + +def make_other_file(file_path: str): + """ + Create an `OtherFile` object from a non-DICOM file, containing information + about this file. + """ + return OtherFile( + os.path.basename(file_path), + make_hash(file_path), + ) + + +def make_acqui(dicom: pydicom.Dataset): + """ + Create an `Acquisition` object from a DICOM file, containg information + about a DICOM series. + """ + return Acquisition( + get_value(dicom, 'SeriesNumber'), + get_value_none(dicom, 'SeriesInstanceUID'), + get_value_none(dicom, 'SeriesDescription'), + get_value_none(dicom, 'SequenceName'), + get_value_none(dicom, 'EchoTime'), + get_value_none(dicom, 'RepetitionTime'), + get_value_none(dicom, 'InversionTime'), + get_value_none(dicom, 'SliceThickness'), + get_value_none(dicom, 'InPlanePhaseEncodingDirection'), + 0, + get_value_none(dicom, 'Modality'), + ) diff --git a/python/lib/dicom/summary_type.py b/python/lib/dicom/summary_type.py new file mode 100644 index 000000000..6bf724b8e --- /dev/null +++ b/python/lib/dicom/summary_type.py @@ -0,0 +1,101 @@ +from dataclasses import dataclass +from datetime import date + + +@dataclass +class Patient: + """ + DICOM patient object, which contains information about a DICOM patient. + """ + + id: str + name: str + sex: str | None + birth_date: date | None + + +@dataclass +class Scanner: + """ + DICOM scanner object, which contains information about a DICOM scanner. + """ + + manufacturer: str + model: str + serial_number: str + software_version: str + + +@dataclass +class Info: + """ + General DICOM information object, which contains general information about + a DICOM directory. + """ + + study_uid: str + patient: Patient + scanner: Scanner + scan_date: date | None + institution: str | None + modality: str + + +@dataclass +class DicomFile: + """ + DICOM file object, which contains information about a DICOM file inside a + DICOM directory. + """ + + file_name: str + md5_sum: str + series_number: int | None + series_uid: str | None + series_description: str | None + file_number: int | None + echo_number: int | None + echo_time: float | None + sequence_name: str | None + + +@dataclass +class OtherFile: + """ + Non-DICOM file object, which contains information about a non-DICOM file + inside a DICOM directory. + """ + + file_name: str + md5_sum: str + + +@dataclass +class Acquisition: + """ + DICOM acquisition object, which contains information about a DICOM series. + """ + + series_number: int + series_uid: str | None + series_description: str | None + sequence_name: str | None + echo_time: float | None # In Milliseconds + repetition_time: float | None # In Milliseconds + inversion_time: float | None # In Milliseconds + slice_thickness: float | None # In Millimeters + phase_encoding: str | None + number_of_files: int + modality: str | None + + +@dataclass +class Summary: + """ + DICOM summary object, which contains information about a DICOM directory. + """ + + info: Info + acquis: list[Acquisition] + dicom_files: list[DicomFile] + other_files: list[OtherFile] diff --git a/python/lib/dicom/summary_write.py b/python/lib/dicom/summary_write.py new file mode 100644 index 000000000..d6ba6935b --- /dev/null +++ b/python/lib/dicom/summary_write.py @@ -0,0 +1,130 @@ +import xml.etree.ElementTree as ET +from lib.dicom.summary_type import Summary, Info, Acquisition, DicomFile, OtherFile +from lib.dicom.text_dict import DictWriter +from lib.dicom.text_table import TableWriter +from lib.dicom.text import write_date_none + + +def write_to_file(filename: str, summary: Summary): + """ + Serialize a DICOM summary object into a text file. + """ + string = write_to_string(summary) + with open(filename, 'w') as file: + file.write(string) + + +def write_to_string(summary: Summary) -> str: + """ + Serialize a DICOM summary object into a string. + """ + return ET.tostring(write_xml(summary), encoding='unicode') + '\n' + + +def write_xml(summary: Summary): + study = ET.Element('STUDY') + ET.SubElement(study, 'STUDY_INFO').text = write_info(summary.info) + ET.SubElement(study, 'FILES').text = write_dicom_files_table(summary.dicom_files) + ET.SubElement(study, 'OTHERS').text = write_other_files_table(summary.other_files) + ET.SubElement(study, 'ACQUISITIONS').text = write_acquis_table(summary.acquis) + ET.SubElement(study, 'SUMMARY').text = write_ending(summary) + ET.indent(study, space='') + return study + + +def write_info(info: Info): + return '\n' + DictWriter([ + ('Unique Study ID' , info.study_uid), + ('Patient Name' , info.patient.name), + ('Patient ID' , info.patient.id), + ('Patient date of birth' , write_date_none(info.patient.birth_date)), + ('Patient Sex' , info.patient.sex), + ('Scan Date' , write_date_none(info.scan_date)), + ('Scanner Manufacturer' , info.scanner.manufacturer), + ('Scanner Model Name' , info.scanner.model), + ('Scanner Serial Number' , info.scanner.serial_number), + ('Scanner Software Version' , info.scanner.software_version), + ('Institution Name' , info.institution), + ('Modality' , info.modality), + ]).write() + + +def write_dicom_files_table(files: list[DicomFile]): + writer = TableWriter() + writer.append_row(['SN', 'FN', 'EN', 'Series', 'md5sum', 'File name']) + for file in files: + writer.append_row([ + file.series_number, + file.file_number, + file.echo_number, + file.series_description, + file.md5_sum, + file.file_name, + ]) + + return '\n' + writer.write() + + +def write_other_files_table(files: list[OtherFile]): + writer = TableWriter() + writer.append_row(['md5sum', 'File name']) + for file in files: + writer.append_row([ + file.md5_sum, + file.file_name, + ]) + + return '\n' + writer.write() + + +def write_acquis_table(acquis: list[Acquisition]): + writer = TableWriter() + writer.append_row([ + 'Series (SN)', + 'Name of series', + 'Seq Name', + 'echoT ms', + 'repT ms', + 'invT ms', + 'sth mm', + 'PhEnc', + 'NoF', + 'Series UID', + 'Mod' + ]) + + for acqui in acquis: + writer.append_row([ + acqui.series_number, + acqui.series_description, + acqui.sequence_name, + acqui.echo_time, + acqui.repetition_time, + acqui.inversion_time, + acqui.slice_thickness, + acqui.phase_encoding, + acqui.number_of_files, + acqui.series_uid, + acqui.modality, + ]) + + return '\n' + writer.write() + + +def write_ending(summary: Summary): + birth_date = summary.info.patient.birth_date + scan_date = summary.info.scan_date + + if birth_date and scan_date: + years = scan_date.year - birth_date.year + months = scan_date.month - birth_date.month + days = scan_date.day - birth_date.day + total = round(years + months / 12 + days / 365.0, 2) + age = f'{total} or {years} years, {months} months {days} days' + else: + age = '' + + return '\n' + DictWriter([ + ('Total number of files', len(summary.dicom_files) + len(summary.other_files)), + ('Age at scan', age), + ]).write() diff --git a/python/lib/dicom/text.py b/python/lib/dicom/text.py new file mode 100644 index 000000000..9d2269008 --- /dev/null +++ b/python/lib/dicom/text.py @@ -0,0 +1,79 @@ +""" +A bunch of functions to convert values between (possibly empty) strings and +different types of values. +""" + +from datetime import datetime, date +import hashlib +import os + + +def write_value(value: str | int | float | None): + if value is None: + return '' + + return str(value) + + +def write_datetime(datetime: datetime): + return datetime.strftime('%Y-%m-%d %H:%M:%S') + + +def write_date(date: date): + return date.strftime('%Y-%m-%d') + + +def write_date_none(date: date | None): + if date is None: + return None + + return write_date(date) + + +def read_none(string: str): + if string == '': + return None + + return string + + +def read_date_none(string: str | None): + if string is None: + return None + + return datetime.strptime(string, '%Y-%m-%d').date() + + +def read_dicom_date_none(string: str | None): + if string is None: + return None + + return datetime.strptime(string, '%Y%m%d').date() + + +def read_int_none(string: str | None): + if string is None: + return None + + return int(string) + + +def read_float_none(string: str | None): + if string is None: + return None + + return float(string) + + +def make_hash(path: str, with_name: bool = False): + """ + Get the MD5 sum hash of a file, with or without the filename appended. + """ + + with open(path, 'rb') as file: + hash = hashlib.md5(file.read()).hexdigest() + + if with_name: + hash = f'{hash} {os.path.basename(path)}' + + return hash diff --git a/python/lib/dicom/text_dict.py b/python/lib/dicom/text_dict.py new file mode 100644 index 000000000..ac8fc08d7 --- /dev/null +++ b/python/lib/dicom/text_dict.py @@ -0,0 +1,43 @@ +from lib.dicom.text import write_value + + +class DictWriter: + """ + Writer for a text dictionary, i.e, a text of the form: + + Key 1 : Value 1 + Key 2 : Value 2 + ... + """ + + def __init__(self, entries: list[tuple[str, str | int | float | None]]): + self.entries = entries + + def get_keys_length(self): + """ + Get the maximal length of the keys, used for padding + """ + length = 0 + for entry in self.entries: + key = entry[0] + if len(key) > length: + length = len(key) + + return length + + def write(self): + """ + Serialize the text dictionary into a string + """ + + if not self.entries: + return '\n' + + length = self.get_keys_length() + + entries = map( + lambda entry: f'* {entry[0].ljust(length)} : {write_value(entry[1])}\n', + self.entries, + ) + + return ''.join(entries) diff --git a/python/lib/dicom/text_table.py b/python/lib/dicom/text_table.py new file mode 100644 index 000000000..0c3109ca8 --- /dev/null +++ b/python/lib/dicom/text_table.py @@ -0,0 +1,52 @@ +from lib.dicom.text import write_value + + +class TableWriter: + """ + Writer for a text table, i.e, a table of the form: + + Field 1 | Field 2 | Field 3 + Value 1 | Value 2 | Value 3 + Value 4 | Value 5 | Value 6 + ... + """ + + rows: list[list[str]] + + def __init__(self): + self.rows = [] + + def get_cells_lengths(self): + """ + Get the longest value length of each column, used for padding + """ + + lengths = [0] * len(self.rows[0]) + for row in self.rows: + for i in range(len(row)): + if len(row[i]) > lengths[i]: + lengths[i] = len(row[i]) + + return lengths + + def append_row(self, cells: list[str | int | float | None]): + """ + Add a row to the table, which can be either the header or some values. + """ + + self.rows.append(list(map(write_value, cells))) + + def write(self): + """ + Serialize the text table into a string. + """ + + if not self.rows: + return '\n' + + lengths = self.get_cells_lengths() + + rows = map(lambda row: list(map(lambda cell, length: cell.ljust(length), row, lengths)), self.rows) + rows = map(lambda row: ' | '.join(row).rstrip() + '\n', rows) + + return ''.join(rows) diff --git a/python/lib/utilities.py b/python/lib/utilities.py index 8fc8e4055..cf7384b01 100755 --- a/python/lib/utilities.py +++ b/python/lib/utilities.py @@ -86,6 +86,25 @@ def append_to_tsv_file(new_tsv_file, old_tsv_file, key_value_check, verbose): writer.writerow(data) +def get_all_files(dir: str) -> list[str]: + """ + Recursively get the all the files inside a given directory, without including the directories + themselves. The returned paths are relative to the given directory. + """ + + def get_all_files_rec(dir: str, path: str): + if os.path.isdir(dir + '/' + path): + files = [] + for file in os.listdir(dir + '/' + path): + files += get_all_files_rec(dir, path + '/' + file) + + return files + + return [path] + + return get_all_files_rec(dir, '') + + def copy_file(file_orig, file_copy, verbose): """ Copies a file to a new location. If something goes wrong during the copy From 535627593452898c3388116cd037c1d0c7723b23 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Sat, 8 Feb 2025 21:12:59 +0000 Subject: [PATCH 02/13] update --- pyproject.toml | 7 +- python/dicom_archive.py | 376 ------------------ python/lib/db/queries/session.py | 11 + python/lib/dicom/dicom_database.py | 134 ------- python/lib/dicom/dicom_log.py | 73 ---- python/lib/dicom/summary_make.py | 216 ---------- python/lib/dicom/summary_write.py | 130 ------ .../lib/import_dicom_study/dicom_database.py | 141 +++++++ python/lib/import_dicom_study/import_log.py | 76 ++++ python/lib/import_dicom_study/summary_get.py | 232 +++++++++++ .../summary_type.py | 54 +-- .../lib/import_dicom_study/summary_write.py | 155 ++++++++ .../lib/{dicom => import_dicom_study}/text.py | 2 +- .../text_dict.py | 9 +- .../text_table.py | 6 +- python/lib/util.py | 14 + python/lib/utilities.py | 19 - python/scripts/import_dicom_study.py | 368 +++++++++++++++++ .../summarize_dicom_study.py} | 24 +- 19 files changed, 1054 insertions(+), 993 deletions(-) delete mode 100644 python/dicom_archive.py delete mode 100644 python/lib/dicom/dicom_database.py delete mode 100644 python/lib/dicom/dicom_log.py delete mode 100644 python/lib/dicom/summary_make.py delete mode 100644 python/lib/dicom/summary_write.py create mode 100644 python/lib/import_dicom_study/dicom_database.py create mode 100644 python/lib/import_dicom_study/import_log.py create mode 100644 python/lib/import_dicom_study/summary_get.py rename python/lib/{dicom => import_dicom_study}/summary_type.py (57%) create mode 100644 python/lib/import_dicom_study/summary_write.py rename python/lib/{dicom => import_dicom_study}/text.py (97%) rename python/lib/{dicom => import_dicom_study}/text_dict.py (76%) rename python/lib/{dicom => import_dicom_study}/text_table.py (90%) create mode 100644 python/lib/util.py create mode 100755 python/scripts/import_dicom_study.py rename python/{dicom_summary.py => scripts/summarize_dicom_study.py} (59%) mode change 100644 => 100755 diff --git a/pyproject.toml b/pyproject.toml index ca87598a3..84fdf2887 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,12 +17,10 @@ select = ["E", "F", "I", "N", "UP", "W"] # `test` directory. [tool.pyright] include = [ - "python/dicom_archive.py", - "python/dicom_summary.py", "python/tests", "python/lib/db", - "python/lib/dicom", "python/lib/exception", + "python/lib/import_dicom_study", "python/lib/util", "python/lib/config_file.py", "python/lib/env.py", @@ -31,6 +29,9 @@ include = [ "python/lib/make_env.py", "python/lib/scanner.py", "python/lib/validate_subject_info.py", + "python/lib/util.py", + "python/scripts/import_dicom_study.py", + "python/scripts/summarize_dicom_study.py", ] typeCheckingMode = "strict" reportMissingTypeStubs = "none" diff --git a/python/dicom_archive.py b/python/dicom_archive.py deleted file mode 100644 index c6cabe00f..000000000 --- a/python/dicom_archive.py +++ /dev/null @@ -1,376 +0,0 @@ -#!/usr/bin/env python - -from typing import Any, cast -import gzip -import os -import shutil -import sys -import tarfile - -from lib.db.connect import connect_to_db -import lib.dicom.dicom_database -import lib.dicom.dicom_log -import lib.dicom.summary_make -import lib.dicom.summary_write -import lib.dicom.text -import lib.exitcode -from lib.lorisgetopt import LorisGetOpt -import lib.database -from lib.db.model.dicom_archive import DbDicomArchive -from lib.db.query.dicom_archive import try_get_dicom_archive_with_study_uid -from lib.db.query.mri_upload import try_get_mri_upload_with_id -from lib.db.query.session import try_get_session_with_cand_id_visit_label - - -def print_error_exit(message: str, code: int): - print(f'ERROR: {message}', file=sys.stderr) - sys.exit(code) - - -def print_warning(message: str): - print(f'WARNING: {message}', file=sys.stderr) - - -class Args: - profile: str - source: str - target: str - today: bool - year: bool - overwrite: bool - insert: bool - update: bool - upload: int | None - session: bool - verbose: bool - - def __init__(self, options_dict: dict[str, Any]): - self.profile = options_dict['profile']['value'] - self.source = options_dict['source']['value'] - self.target = options_dict['target']['value'] - self.today = options_dict['today']['value'] - self.year = options_dict['year']['value'] - self.overwrite = options_dict['overwrite']['value'] - self.insert = options_dict['insert']['value'] - self.update = options_dict['update']['value'] - self.upload = options_dict['upload']['value'] - self.session = options_dict['session']['value'] - self.verbose = options_dict['verbose']['value'] - - -def check_create_file(args: Args, path: str): - if os.path.exists(path): - if args.overwrite: - print_warning(f'Overwriting \'{path}\'') - else: - print_error_exit( - ( - f'File or directory \'{path}\' already exists. ' - 'Use option \'--overwrite\' to overwrite it.' - ), - lib.exitcode.TARGET_EXISTS_NO_CLOBBER, - ) - - -def main(): - usage = ( - "\n" - - "********************************************************************\n" - " DICOM ARCHIVING SCRIPT\n" - "********************************************************************\n" - "The program reads a DICOM directory, processes it into a structured and " - "compressed archive, and insert it or upload it to the LORIS database." - - "usage : dicom_archive.py -p -s -t ...\n\n" - - "options: \n" - "\t-p, --profile : Name of the python database config file in dicom-archive/.loris_mri\n" - "\t-s, --source : Source directory containing the DICOM files to archive\n" - "\t-t, --target : Directory in which to place the resulting DICOM archive\n" - "\t --today : Use today's date as the scan date instead of the DICOM scan date\n" - "\t --year : Create the archive in a year subdirectory (example: 2024/DCM_2024-08-27_FooBar.tar)s\n" - "\t --overwrite : Overwrite the DICOM archive file if it already exists\n" - "\t --insert : Insert the created DICOM archive in the database (requires the archive\n" - "\t to not be already inserted)\n" - "\t --update : Update the DICOM archive in the database (requires the archive to be\n" - "\t already be inserted), generally used with --overwrite" - "\t --upload : Associate the DICOM archive with an existing MRI upload in the database, which is\n" - " updated accordingly" - "\t --session : Determine the session for the DICOM archive using the LORIS configuration, and associate\n" - " them accordingly" - "\t-v, --verbose : If set, be verbose\n\n" - - "required options are: \n" - "\t--profile\n" - "\t--source\n" - "\t--target\n\n" - ) - - # NOTE: Some options do not have short options but LorisGetOpt does not support that, so we - # repeat the long names. - options_dict = { - "profile": { - "value": None, "required": True, "expect_arg": True, "short_opt": "p", "is_path": False - }, - "source": { - "value": None, "required": True, "expect_arg": True, "short_opt": "s", "is_path": True, - }, - "target": { - "value": None, "required": True, "expect_arg": True, "short_opt": "t", "is_path": True, - }, - "today": { - "value": False, "required": False, "expect_arg": False, "short_opt": "today", "is_path": False, - }, - "year": { - "value": False, "required": False, "expect_arg": False, "short_opt": "year", "is_path": False, - }, - "overwrite": { - "value": False, "required": False, "expect_arg": False, "short_opt": "overwrite", "is_path": False, - }, - "insert": { - "value": False, "required": False, "expect_arg": False, "short_opt": "insert", "is_path": False, - }, - "update": { - "value": False, "required": False, "expect_arg": False, "short_opt": "update", "is_path": False, - }, - "upload": { - "value": True, "required": False, "expect_arg": False, "short_opt": "upload", "is_path": False, - }, - "session": { - "value": False, "required": False, "expect_arg": False, "short_opt": "session", "is_path": False, - }, - "verbose": { - "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False - }, - "help": { - "value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False - }, - } - - # Get the CLI arguments and connect to the database - - loris_getopt_obj = LorisGetOpt(usage, options_dict, os.path.basename(__file__[:-3])) - args = Args(loris_getopt_obj.options_dict) - - config = cast(Any, loris_getopt_obj.config_info).mysql - db = connect_to_db(config.mysql) - old_db = lib.database.Database(config.mysql, args.verbose) - get_subject_ids = None - try: - get_subject_ids = config.get_subject_ids - except AttributeError: - print_error_exit( - 'Config file does not contain a `get_subject_ids` function.', - lib.exitcode.BAD_CONFIG_SETTING, - ) - - # Check arguments - - if args.insert and args.update: - print_error_exit( - 'Arguments \'--insert\' and \'--update\' must not be set both at the same time.', - lib.exitcode.INVALID_ARG, - ) - - if not os.path.isdir(args.source) or not os.access(args.source, os.R_OK): - print_error_exit( - 'Argument \'--source\' must be a readable directory path.', - lib.exitcode.INVALID_ARG, - ) - - if not os.path.isdir(args.target) or not os.access(args.target, os.W_OK): - print_error_exit( - 'Argument \'--target\' must be a writable directory path.', - lib.exitcode.INVALID_ARG, - ) - - if (args.session or args.upload is not None) and not (args.insert or args.update): - print_error_exit( - 'Arguments \'--db-insert\' or \'--db-update\' must be set when \'--db-session\' or \'--db-upload\' is set.', - lib.exitcode.INVALID_ARG, - ) - - # Check paths - - base_name = os.path.basename(args.source) - - tar_path = f'{args.target}/{base_name}.tar' - zip_path = f'{args.target}/{base_name}.tar.gz' - summary_path = f'{args.target}/{base_name}.meta' - log_path = f'{args.target}/{base_name}.log' - - check_create_file(args, tar_path) - check_create_file(args, zip_path) - check_create_file(args, summary_path) - check_create_file(args, log_path) - - # Check MRI upload - - mri_upload = None - if args.upload is not None: - mri_upload = try_get_mri_upload_with_id(db, args.upload) - if mri_upload is None: - print_error_exit( - f'No MRI upload found in the database with id {args.upload}.', - lib.exitcode.UPDATE_FAILURE, - ) - - print('Extracting DICOM information (may take a long time)') - - summary = lib.dicom.summary_make.make(args.source, args.verbose) - - print('Checking database presence') - - dicom_archive = try_get_dicom_archive_with_study_uid(db, summary.info.study_uid) - - if args.insert and dicom_archive is not None: - print_error_exit( - ( - f'Study \'{summary.info.study_uid}\' is already inserted in the database\n' - 'Previous archiving log:\n' - f'{dicom_archive.create_info}' - ), - lib.exitcode.INSERT_FAILURE, - ) - - if args.update and dicom_archive is None: - print_error_exit( - f'No study \'{summary.info.study_uid}\' found in the database', - lib.exitcode.UPDATE_FAILURE, - ) - - session = None - if args.session: - get_subject_ids = cast(Any, get_subject_ids) - - print('Determine session from configuration') - - ids = get_subject_ids(old_db, summary.info.patient.name) - cand_id = ids['CandID'] - visit_label = ids['visitLabel'] - session = try_get_session_with_cand_id_visit_label(db, cand_id, visit_label) - - if session is None: - print_error_exit( - ( - f'No session found in the database for patient name \'{summary.info.patient.name}\' ' - f'and visit label \'{visit_label}\'.' - ), - lib.exitcode.GET_SESSION_ID_FAILURE, - ) - - print('Copying into DICOM tar') - - with tarfile.open(tar_path, 'w') as tar: - for file in os.listdir(args.source): - tar.add(args.source + '/' + file) - - print('Calculating DICOM tar MD5 sum') - - tarball_md5_sum = lib.dicom.text.make_hash(tar_path, True) - - print('Zipping DICOM tar (may take a long time)') - - with open(tar_path, 'rb') as tar: - # 6 is the default compression level of the tar command, Python's - # default is 9, which is more powerful but also too slow. - with gzip.open(zip_path, 'wb', compresslevel=6) as zip: - shutil.copyfileobj(tar, zip) - - print('Calculating DICOM zip MD5 sum') - - zipball_md5_sum = lib.dicom.text.make_hash(zip_path, True) - - print('Getting DICOM scan date') - - if not args.today and summary.info.scan_date is None: - print_warning(( - 'No scan date was found in the DICOMs, ' - 'consider using argument \'--today\' to use today\'s date as the scan date.' - )) - - if args.year and summary.info.scan_date is None: - print_warning(( - 'Argument \'--year\' was provided but no scan date was found in the DICOMs, ' - 'the argument will be ignored.' - )) - - if args.year and summary.info.scan_date is not None: - dir_path = f'{args.target}/{summary.info.scan_date.year}' - if not os.path.exists(dir_path): - print(f'Creating directory \'{dir_path}\'') - os.mkdir(dir_path) - elif not os.path.isdir(dir_path) or not os.access(dir_path, os.W_OK): - print_error_exit( - f'Path \'{dir_path}\' exists but is not a writable directory.', - lib.exitcode.CREATE_DIR_FAILURE, - ) - else: - dir_path = args.target - - if summary.info.scan_date is not None: - scan_date_string = lib.dicom.text.write_date(summary.info.scan_date) - archive_path = f'{dir_path}/DCM_{scan_date_string}_{base_name}.tar' - else: - archive_path = f'{dir_path}/DCM_{base_name}.tar' - - check_create_file(args, archive_path) - - log = lib.dicom.dicom_log.make(args.source, archive_path, tarball_md5_sum, zipball_md5_sum) - - if args.verbose: - print('The archive will be created with the following arguments:') - print(lib.dicom.dicom_log.write_to_string(log)) - - print('Writing summary file') - - lib.dicom.summary_write.write_to_file(summary_path, summary) - - print('Writing log file') - - lib.dicom.dicom_log.write_to_file(log_path, log) - - print('Copying into DICOM archive') - - with tarfile.open(archive_path, 'w') as tar: - tar.add(zip_path, os.path.basename(zip_path)) - tar.add(summary_path, os.path.basename(summary_path)) - tar.add(log_path, os.path.basename(log_path)) - - print('Removing temporary files') - - os.remove(tar_path) - os.remove(zip_path) - os.remove(summary_path) - os.remove(log_path) - - print('Calculating DICOM tar MD5 sum') - - log.archive_md5_sum = lib.dicom.text.make_hash(log.target_path, True) - - if args.insert: - lib.dicom.dicom_database.insert(db, log, summary) - - if args.update: - # Safe because we checked previously that the DICOM archive is not `None` - dicom_archive = cast(DbDicomArchive, dicom_archive) - lib.dicom.dicom_database.update(db, dicom_archive, log, summary) - - if mri_upload is not None: - print('Updating MRI upload in the database') - dicom_archive = cast(DbDicomArchive, dicom_archive) - dicom_archive.upload = mri_upload - - if session is not None: - dicom_archive = cast(DbDicomArchive, dicom_archive) - dicom_archive.session = session - - if mri_upload is not None: - mri_upload.session = session - - print('Success') - - -if __name__ == "__main__": - main() diff --git a/python/lib/db/queries/session.py b/python/lib/db/queries/session.py index a32d3cbd7..e0d45086b 100644 --- a/python/lib/db/queries/session.py +++ b/python/lib/db/queries/session.py @@ -1,3 +1,4 @@ + from sqlalchemy import select from sqlalchemy.orm import Session as Database @@ -16,3 +17,13 @@ def try_get_session_with_cand_id_visit_label(db: Database, cand_id: int, visit_l .where(DbSession.visit_label == visit_label) .where(DbCandidate.cand_id == cand_id) ).scalar_one_or_none() + + +def try_get_session_with_id(db: Database, session_id: int): + """ + Get a session from the database using its ID, or return `None` if no session is found. + """ + + return db.execute(select(DbSession) + .where(DbSession.id == session_id) + ).scalar_one_or_none() diff --git a/python/lib/dicom/dicom_database.py b/python/lib/dicom/dicom_database.py deleted file mode 100644 index 8367962ec..000000000 --- a/python/lib/dicom/dicom_database.py +++ /dev/null @@ -1,134 +0,0 @@ -from datetime import datetime -from sqlalchemy.orm import Session as Database -from lib.db.model.dicom_archive import DbDicomArchive -from lib.db.model.dicom_archive_file import DbDicomArchiveFile -from lib.db.model.dicom_archive_series import DbDicomArchiveSeries -from lib.db.query.dicom_archive import delete_dicom_archive_file_series, get_dicom_archive_series_with_file_info -from lib.dicom.summary_type import Summary -from lib.dicom.dicom_log import DicomArchiveLog -import lib.dicom.text -import lib.dicom.summary_write -import lib.dicom.dicom_log - - -def populate_dicom_archive( - dicom_archive: DbDicomArchive, - log: DicomArchiveLog, - summary: Summary, - archive_path: str, - session_id: int | None, -): - """ - Populate a DICOM archive with information from its DICOM archiving log and DICOM summary. - - :param dicom_archive: The DICOM archive ORM object to populate. - :param log: The DICOM arching log object. - :param summary: The DICOM summary object. - :param session_id: The optional session ID associated with the DICOM archive. - """ - - dicom_archive.study_uid = summary.info.study_uid - dicom_archive.patient_id = summary.info.patient.id - dicom_archive.patient_name = summary.info.patient.name - dicom_archive.patient_birthdate = summary.info.patient.birth_date - dicom_archive.patient_sex = summary.info.patient.sex - dicom_archive.neuro_db_center_name = None - dicom_archive.center_name = summary.info.institution or '' - dicom_archive.last_update = None - dicom_archive.date_acquired = summary.info.scan_date - dicom_archive.date_last_archived = datetime.now() - dicom_archive.acquisition_count = len(summary.acquis) - dicom_archive.dicom_file_count = len(summary.dicom_files) - dicom_archive.non_dicom_file_count = len(summary.other_files) - dicom_archive.md5_sum_dicom_only = log.tarball_md5_sum - dicom_archive.md5_sum_archive = log.archive_md5_sum - dicom_archive.creating_user = log.creator_name - dicom_archive.sum_type_version = log.summary_version - dicom_archive.tar_type_version = log.archive_version - dicom_archive.source_location = log.source_path - dicom_archive.archive_location = archive_path - dicom_archive.scanner_manufacturer = summary.info.scanner.manufacturer - dicom_archive.scanner_model = summary.info.scanner.model - dicom_archive.scanner_serial_number = summary.info.scanner.serial_number - dicom_archive.scanner_software_version = summary.info.scanner.software_version - dicom_archive.session_id = session_id - dicom_archive.upload_attempt = 0 - dicom_archive.create_info = lib.dicom.dicom_log.write_to_string(log) - dicom_archive.acquisition_metadata = lib.dicom.summary_write.write_to_string(summary) - dicom_archive.date_sent = None - dicom_archive.pending_transfer = 0 - - -def insert(db: Database, log: DicomArchiveLog, summary: Summary): - """ - Insert a DICOM archive into the database. - - :param db: The database. - :param log: The archiving log of the DICOM archive. - :param summary: The summary of the DICOM archive. - """ - - dicom_archive = DbDicomArchive() - populate_dicom_archive(dicom_archive, log, summary, 'TODO', None) - dicom_archive.date_first_archived = datetime.now() - db.add(dicom_archive) - insert_files_series(db, dicom_archive, summary) - return dicom_archive - - -def insert_files_series(db: Database, dicom_archive: DbDicomArchive, summary: Summary): - for acqui in summary.acquis: - db.add(DbDicomArchiveSeries( - archive_id = dicom_archive.id, - series_number = acqui.series_number, - series_description = acqui.series_description, - sequence_name = acqui.sequence_name, - echo_time = acqui.echo_time, - repetition_time = acqui.repetition_time, - inversion_time = acqui.inversion_time, - slice_thickness = acqui.slice_thickness, - phase_encoding = acqui.phase_encoding, - number_of_files = acqui.number_of_files, - series_uid = acqui.series_uid, - modality = acqui.modality, - )) - - for file in summary.dicom_files: - series = get_dicom_archive_series_with_file_info( - db, - file.series_uid or '', - file.series_number or 1, - file.echo_time, - file.sequence_name or '', - ) - - db.add(DbDicomArchiveFile( - archive_id = dicom_archive.id, - series_number = file.series_number, - file_number = file.file_number, - echo_number = file.echo_number, - series_description = file.series_description, - md5_sum = file.md5_sum, - file_name = file.file_name, - series_id = series.id, - )) - - -def update(db: Database, dicom_archive: DbDicomArchive, log: DicomArchiveLog, summary: Summary): - """ - Insert a DICOM archive into the database. - - :param db: The database. - :param archive: The DICOM archive to update. - :param log: The archiving log of the DICOM archive. - :param summary: The summary of the DICOM archive. - """ - - # Delete the associated database DICOM files and series. - delete_dicom_archive_file_series(db, dicom_archive) - - # Update the database record with the new DICOM information. - populate_dicom_archive(dicom_archive, log, summary, 'TODO', None) - - # Insert the new DICOM files and series. - insert_files_series(db, dicom_archive, summary) diff --git a/python/lib/dicom/dicom_log.py b/python/lib/dicom/dicom_log.py deleted file mode 100644 index 37fc18503..000000000 --- a/python/lib/dicom/dicom_log.py +++ /dev/null @@ -1,73 +0,0 @@ -from dataclasses import dataclass -from datetime import datetime -import os -import socket -from lib.dicom.text_dict import DictWriter - - -@dataclass -class DicomArchiveLog: - """ - DICOM archiving log object, containg information about the archiving of a - DICOM directory. - """ - - source_path: str - target_path: str - creator_host: str - creator_os: str - creator_name: str - archive_date: str - summary_version: int - archive_version: int - tarball_md5_sum: str - zipball_md5_sum: str - archive_md5_sum: str - - -def write_to_string(log: DicomArchiveLog): - """ - Serialize a DICOM archiving log object into a string. - """ - return DictWriter([ - ('Taken from dir' , log.source_path), - ('Archive target location' , log.target_path), - ('Name of creating host' , log.creator_host), - ('Name of host OS' , log.creator_os), - ('Created by user' , log.creator_name), - ('Archived on' , log.archive_date), - ('dicomSummary version' , log.summary_version), - ('dicomTar version' , log.archive_version), - ('md5sum for DICOM tarball' , log.tarball_md5_sum), - ('md5sum for DICOM tarball gzipped' , log.zipball_md5_sum), - ('md5sum for complete archive' , log.archive_md5_sum), - ]).write() - - -def write_to_file(file_path: str, log: DicomArchiveLog): - """ - Serialize a DICOM archiving log object into a text file. - """ - string = write_to_string(log) - with open(file_path, 'w') as file: - file.write(string) - - -def make(source: str, target: str, tarball_md5_sum: str, zipball_md5_sum: str): - """ - Create a DICOM archiving log object from the provided arguments on a DICOM - directory, as well as the current execution environment. - """ - return DicomArchiveLog( - source, - target, - socket.gethostname(), - os.uname().sysname, - os.environ['USER'], - datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'), - 2, - 2, - tarball_md5_sum, - zipball_md5_sum, - 'Provided in database only', - ) diff --git a/python/lib/dicom/summary_make.py b/python/lib/dicom/summary_make.py deleted file mode 100644 index 60f289eaf..000000000 --- a/python/lib/dicom/summary_make.py +++ /dev/null @@ -1,216 +0,0 @@ -from functools import cmp_to_key -import os -import pydicom -import pydicom.errors -from lib.dicom.summary_type import Summary, Info, Patient, Scanner, Acquisition, DicomFile, OtherFile -from lib.dicom.text import make_hash, read_dicom_date_none -from lib.utilities import get_all_files - - -def get_value(dicom: pydicom.Dataset, tag: str): - """ - Get a required value from a DICOM. - """ - - if tag not in dicom: - raise Exception(f'Expected DICOM tag \'{tag}\' but found none.') - - return dicom[tag].value - - -def get_value_none(dicom: pydicom.Dataset, tag: str): - """ - Get a nullable value from a DICOM. - """ - - if tag not in dicom: - return None - - return dicom[tag].value or None - - -def cmp_int_none(a: int | None, b: int | None): - """ - Order comparison between two nullable integers. - The returned value is in accordance with `functools.cmp_to_key`. - https://docs.python.org/3/library/functools.html#functools.cmp_to_key - """ - - match a, b: - case None, None: - return 0 - case _, None: - return -1 - case None, _: - return 1 - case a, b: - return a - b - - -def cmp_string_none(a: str | None, b: str | None): - """ - Order comparison between two nullable strings. - The returned value is in accordance with `functools.cmp_to_key`. - https://docs.python.org/3/library/functools.html#functools.cmp_to_key - """ - - match a, b: - case None, None: - return 0 - case _, None: - return -1 - case None, _: - return 1 - case a, b if a < b: - return -1 - case a, b if a > b: - return 1 - case a, b: - return 0 - - -def cmp_files(a: DicomFile, b: DicomFile): - """ - Compare the order of two files to sort them in the summary. - """ - - return \ - cmp_int_none(a.series_number, b.series_number) or \ - cmp_int_none(a.file_number, b.file_number) or \ - cmp_int_none(a.echo_number, b.echo_number) - - -def cmp_acquis(a: Acquisition, b: Acquisition): - """ - Compare the order of two acquisitions to sort them in the summary. - """ - - return \ - a.series_number - b.series_number or \ - cmp_string_none(a.sequence_name, b.sequence_name) - - -def make(dir_path: str, verbose: bool): - """ - Create a DICOM summary object from a DICOM directory path. - """ - - info = None - dicom_files: list[DicomFile] = [] - other_files: list[OtherFile] = [] - acquis_dict: dict[tuple[int, int | None, str | None], Acquisition] = dict() - - file_paths = get_all_files(dir_path) - for i, file_path in enumerate(file_paths): - if verbose: - print(f'Processing file \'{file_path}\' ({i + 1}/{len(file_paths)})') - - try: - dicom = pydicom.dcmread(dir_path + '/' + file_path) # type: ignore - if info is None: - info = make_info(dicom) - - dicom_files.append(make_dicom_file(dicom)) - - series = dicom.SeriesNumber - echo = get_value_none(dicom, 'EchoNumbers') - sequence = get_value_none(dicom, 'SequenceName') - - if (series, sequence, echo) not in acquis_dict: - acquis_dict[(series, sequence, echo)] = make_acqui(dicom) - - acquis_dict[(series, sequence, echo)].number_of_files += 1 - except pydicom.errors.InvalidDicomError: - other_files.append(make_other_file(dir_path + '/' + file_path)) - - if info is None: - raise Exception('Found no DICOM file in the directory.') - - acquis = list(acquis_dict.values()) - - dicom_files = sorted(dicom_files, key=cmp_to_key(cmp_files)) - acquis = sorted(acquis, key=cmp_to_key(cmp_acquis)) - - return Summary(info, acquis, dicom_files, other_files) - - -def make_info(dicom: pydicom.Dataset): - """ - Create an `Info` object from a DICOM file, containing general information - about the DICOM directory. - """ - - birth_date = read_dicom_date_none(get_value_none(dicom, 'PatientBirthDate')) - scan_date = read_dicom_date_none(get_value_none(dicom, 'StudyDate')) - - patient = Patient( - get_value(dicom, 'PatientID'), - get_value(dicom, 'PatientName'), - get_value_none(dicom, 'PatientSex'), - birth_date, - ) - - scanner = Scanner( - get_value(dicom, 'Manufacturer'), - get_value(dicom, 'ManufacturerModelName'), - get_value(dicom, 'DeviceSerialNumber'), - get_value(dicom, 'SoftwareVersions'), - ) - - return Info( - get_value(dicom, 'StudyInstanceUID'), - patient, - scanner, - scan_date, - get_value_none(dicom, 'InstitutionName'), - get_value(dicom, 'Modality'), - ) - - -def make_dicom_file(dicom: pydicom.Dataset): - """ - Create a `DicomFile` object from a DICOM file, containing information about - this DICOM file. - """ - return DicomFile( - os.path.basename(dicom.filename), - make_hash(dicom.filename), - get_value_none(dicom, 'SeriesNumber'), - get_value_none(dicom, 'SeriesInstanceUID'), - get_value_none(dicom, 'SeriesDescription'), - get_value_none(dicom, 'InstanceNumber'), - get_value_none(dicom, 'EchoNumbers'), - get_value_none(dicom, 'EchoTime'), - get_value_none(dicom, 'SequenceName'), - ) - - -def make_other_file(file_path: str): - """ - Create an `OtherFile` object from a non-DICOM file, containing information - about this file. - """ - return OtherFile( - os.path.basename(file_path), - make_hash(file_path), - ) - - -def make_acqui(dicom: pydicom.Dataset): - """ - Create an `Acquisition` object from a DICOM file, containg information - about a DICOM series. - """ - return Acquisition( - get_value(dicom, 'SeriesNumber'), - get_value_none(dicom, 'SeriesInstanceUID'), - get_value_none(dicom, 'SeriesDescription'), - get_value_none(dicom, 'SequenceName'), - get_value_none(dicom, 'EchoTime'), - get_value_none(dicom, 'RepetitionTime'), - get_value_none(dicom, 'InversionTime'), - get_value_none(dicom, 'SliceThickness'), - get_value_none(dicom, 'InPlanePhaseEncodingDirection'), - 0, - get_value_none(dicom, 'Modality'), - ) diff --git a/python/lib/dicom/summary_write.py b/python/lib/dicom/summary_write.py deleted file mode 100644 index d6ba6935b..000000000 --- a/python/lib/dicom/summary_write.py +++ /dev/null @@ -1,130 +0,0 @@ -import xml.etree.ElementTree as ET -from lib.dicom.summary_type import Summary, Info, Acquisition, DicomFile, OtherFile -from lib.dicom.text_dict import DictWriter -from lib.dicom.text_table import TableWriter -from lib.dicom.text import write_date_none - - -def write_to_file(filename: str, summary: Summary): - """ - Serialize a DICOM summary object into a text file. - """ - string = write_to_string(summary) - with open(filename, 'w') as file: - file.write(string) - - -def write_to_string(summary: Summary) -> str: - """ - Serialize a DICOM summary object into a string. - """ - return ET.tostring(write_xml(summary), encoding='unicode') + '\n' - - -def write_xml(summary: Summary): - study = ET.Element('STUDY') - ET.SubElement(study, 'STUDY_INFO').text = write_info(summary.info) - ET.SubElement(study, 'FILES').text = write_dicom_files_table(summary.dicom_files) - ET.SubElement(study, 'OTHERS').text = write_other_files_table(summary.other_files) - ET.SubElement(study, 'ACQUISITIONS').text = write_acquis_table(summary.acquis) - ET.SubElement(study, 'SUMMARY').text = write_ending(summary) - ET.indent(study, space='') - return study - - -def write_info(info: Info): - return '\n' + DictWriter([ - ('Unique Study ID' , info.study_uid), - ('Patient Name' , info.patient.name), - ('Patient ID' , info.patient.id), - ('Patient date of birth' , write_date_none(info.patient.birth_date)), - ('Patient Sex' , info.patient.sex), - ('Scan Date' , write_date_none(info.scan_date)), - ('Scanner Manufacturer' , info.scanner.manufacturer), - ('Scanner Model Name' , info.scanner.model), - ('Scanner Serial Number' , info.scanner.serial_number), - ('Scanner Software Version' , info.scanner.software_version), - ('Institution Name' , info.institution), - ('Modality' , info.modality), - ]).write() - - -def write_dicom_files_table(files: list[DicomFile]): - writer = TableWriter() - writer.append_row(['SN', 'FN', 'EN', 'Series', 'md5sum', 'File name']) - for file in files: - writer.append_row([ - file.series_number, - file.file_number, - file.echo_number, - file.series_description, - file.md5_sum, - file.file_name, - ]) - - return '\n' + writer.write() - - -def write_other_files_table(files: list[OtherFile]): - writer = TableWriter() - writer.append_row(['md5sum', 'File name']) - for file in files: - writer.append_row([ - file.md5_sum, - file.file_name, - ]) - - return '\n' + writer.write() - - -def write_acquis_table(acquis: list[Acquisition]): - writer = TableWriter() - writer.append_row([ - 'Series (SN)', - 'Name of series', - 'Seq Name', - 'echoT ms', - 'repT ms', - 'invT ms', - 'sth mm', - 'PhEnc', - 'NoF', - 'Series UID', - 'Mod' - ]) - - for acqui in acquis: - writer.append_row([ - acqui.series_number, - acqui.series_description, - acqui.sequence_name, - acqui.echo_time, - acqui.repetition_time, - acqui.inversion_time, - acqui.slice_thickness, - acqui.phase_encoding, - acqui.number_of_files, - acqui.series_uid, - acqui.modality, - ]) - - return '\n' + writer.write() - - -def write_ending(summary: Summary): - birth_date = summary.info.patient.birth_date - scan_date = summary.info.scan_date - - if birth_date and scan_date: - years = scan_date.year - birth_date.year - months = scan_date.month - birth_date.month - days = scan_date.day - birth_date.day - total = round(years + months / 12 + days / 365.0, 2) - age = f'{total} or {years} years, {months} months {days} days' - else: - age = '' - - return '\n' + DictWriter([ - ('Total number of files', len(summary.dicom_files) + len(summary.other_files)), - ('Age at scan', age), - ]).write() diff --git a/python/lib/import_dicom_study/dicom_database.py b/python/lib/import_dicom_study/dicom_database.py new file mode 100644 index 000000000..d0a671495 --- /dev/null +++ b/python/lib/import_dicom_study/dicom_database.py @@ -0,0 +1,141 @@ +from datetime import datetime + +from sqlalchemy.orm import Session as Database + +from lib.db.models.dicom_archive import DbDicomArchive +from lib.db.models.dicom_archive_file import DbDicomArchiveFile +from lib.db.models.dicom_archive_series import DbDicomArchiveSeries +from lib.db.queries.dicom_archive import delete_dicom_archive_file_series, get_dicom_archive_series_with_file_info +from lib.import_dicom_study.import_log import DicomStudyImportLog, write_dicom_study_import_log_to_string +from lib.import_dicom_study.summary_type import DicomStudySummary +from lib.import_dicom_study.summary_write import write_dicom_study_summary + + +def insert_dicom_archive( + db: Database, + dicom_summary: DicomStudySummary, + dicom_import_log: DicomStudyImportLog, + archive_location: str, +): + """ + Insert a DICOM archive in the database. + """ + + dicom_archive = DbDicomArchive() + populate_dicom_archive(dicom_archive, dicom_summary, dicom_import_log, archive_location) + dicom_archive.date_first_archived = datetime.now() + db.add(dicom_archive) + db.commit() + insert_files_series(db, dicom_archive, dicom_summary) + return dicom_archive + + +def update_dicom_archive( + db: Database, + dicom_archive: DbDicomArchive, + dicom_summary: DicomStudySummary, + dicom_import_log: DicomStudyImportLog, + archive_location: str, +): + """ + Update a DICOM archive in the database. + """ + + # Delete the associated database DICOM files and series. + delete_dicom_archive_file_series(db, dicom_archive) + + # Update the database record with the new DICOM information. + populate_dicom_archive(dicom_archive, dicom_summary, dicom_import_log, archive_location) + db.commit() + + # Insert the new DICOM files and series. + insert_files_series(db, dicom_archive, dicom_summary) + + +def populate_dicom_archive( + dicom_archive: DbDicomArchive, + dicom_summary: DicomStudySummary, + dicom_import_log: DicomStudyImportLog, + archive_location: str, +): + """ + Populate a DICOM archive database object with information from its DICOM summary and DICOM + study import log. + """ + + dicom_archive.study_uid = dicom_summary.info.study_uid + dicom_archive.patient_id = dicom_summary.info.patient.id + dicom_archive.patient_name = dicom_summary.info.patient.name + dicom_archive.patient_birthdate = dicom_summary.info.patient.birth_date + dicom_archive.patient_sex = dicom_summary.info.patient.sex + dicom_archive.neuro_db_center_name = None + dicom_archive.center_name = dicom_summary.info.institution or '' + dicom_archive.last_update = None + dicom_archive.date_acquired = dicom_summary.info.scan_date + dicom_archive.date_last_archived = datetime.now() + dicom_archive.acquisition_count = len(dicom_summary.acquisitions) + dicom_archive.dicom_file_count = len(dicom_summary.dicom_files) + dicom_archive.non_dicom_file_count = len(dicom_summary.other_files) + dicom_archive.md5_sum_dicom_only = dicom_import_log.tarball_md5_sum + dicom_archive.md5_sum_archive = dicom_import_log.archive_md5_sum + dicom_archive.creating_user = dicom_import_log.creator_name + dicom_archive.sum_type_version = dicom_import_log.summary_version + dicom_archive.tar_type_version = dicom_import_log.archive_version + dicom_archive.source_location = dicom_import_log.source_path + dicom_archive.archive_location = archive_location + dicom_archive.scanner_manufacturer = dicom_summary.info.scanner.manufacturer + dicom_archive.scanner_model = dicom_summary.info.scanner.model + dicom_archive.scanner_serial_number = dicom_summary.info.scanner.serial_number + dicom_archive.scanner_software_version = dicom_summary.info.scanner.software_version + dicom_archive.session_id = None + dicom_archive.upload_attempt = 0 + dicom_archive.create_info = write_dicom_study_import_log_to_string(dicom_import_log) + dicom_archive.acquisition_metadata = write_dicom_study_summary(dicom_summary) + dicom_archive.date_sent = None + dicom_archive.pending_transfer = False + + +def insert_files_series(db: Database, dicom_archive: DbDicomArchive, dicom_summary: DicomStudySummary): + """ + Insert the DICOM files and series related to a DICOM archive in the database. + """ + + for acquisition in dicom_summary.acquisitions: + db.add(DbDicomArchiveSeries( + archive_id = dicom_archive.id, + series_number = acquisition.series_number, + series_description = acquisition.series_description, + sequence_name = acquisition.sequence_name, + echo_time = acquisition.echo_time, + repetition_time = acquisition.repetition_time, + inversion_time = acquisition.inversion_time, + slice_thickness = acquisition.slice_thickness, + phase_encoding = acquisition.phase_encoding, + number_of_files = acquisition.number_of_files, + series_uid = acquisition.series_uid, + modality = acquisition.modality, + )) + + db.commit() + + for dicom_file in dicom_summary.dicom_files: + dicom_series = get_dicom_archive_series_with_file_info( + db, + dicom_file.series_uid or '', + dicom_file.series_number or 1, + dicom_file.echo_time, + dicom_file.sequence_name, + ) + + db.add(DbDicomArchiveFile( + archive_id = dicom_archive.id, + series_number = dicom_file.series_number, + file_number = dicom_file.file_number, + echo_number = dicom_file.echo_number, + series_description = dicom_file.series_description, + md5_sum = dicom_file.md5_sum, + file_name = dicom_file.file_name, + series_id = dicom_series.id, + )) + + db.commit() diff --git a/python/lib/import_dicom_study/import_log.py b/python/lib/import_dicom_study/import_log.py new file mode 100644 index 000000000..2b88972cd --- /dev/null +++ b/python/lib/import_dicom_study/import_log.py @@ -0,0 +1,76 @@ +import os +import socket +from dataclasses import dataclass +from datetime import datetime + +from lib.import_dicom_study.text_dict import DictWriter + + +@dataclass +class DicomStudyImportLog: + """ + Information about the past import of a DICOM study. + """ + + source_path: str + target_path: str + creator_host: str + creator_os: str + creator_name: str + archive_date: str + summary_version: int + archive_version: int + tarball_md5_sum: str + zipball_md5_sum: str + archive_md5_sum: str + + +def write_dicom_study_import_log_to_string(import_log: DicomStudyImportLog): + """ + Serialize a DICOM study import log into a string. + """ + + return DictWriter([ + ("Taken from dir", import_log.source_path), + ("Archive target location", import_log.target_path), + ("Name of creating host", import_log.creator_host), + ("Name of host OS", import_log.creator_os), + ("Created by user", import_log.creator_name), + ("Archived on", import_log.archive_date), + ("dicomSummary version", import_log.summary_version), + ("dicomTar version", import_log.archive_version), + ("md5sum for DICOM tarball", import_log.tarball_md5_sum), + ("md5sum for DICOM tarball gzipped", import_log.zipball_md5_sum), + ("md5sum for complete archive", import_log.archive_md5_sum), + ]).write() + + +def write_dicom_study_import_log_to_file(import_log: DicomStudyImportLog, file_path: str): + """ + Serialize a DICOM study import log into a text file. + """ + + string = write_dicom_study_import_log_to_string(import_log) + with open(file_path, "w") as file: + file.write(string) + + +def make_dicom_study_import_log(source: str, target: str, tarball_md5_sum: str, zipball_md5_sum: str): + """ + Create a DICOM study import log from the provided arguments about a DICOM study, as well as the + current execution environment. + """ + + return DicomStudyImportLog( + source, + target, + socket.gethostname(), + os.uname().sysname, + os.environ["USER"], + datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S"), + 2, + 2, + tarball_md5_sum, + zipball_md5_sum, + "Provided in database only", + ) diff --git a/python/lib/import_dicom_study/summary_get.py b/python/lib/import_dicom_study/summary_get.py new file mode 100644 index 000000000..b3233629c --- /dev/null +++ b/python/lib/import_dicom_study/summary_get.py @@ -0,0 +1,232 @@ +import os +from functools import cmp_to_key + +import pydicom +import pydicom.errors + +from lib.import_dicom_study.summary_type import ( + DicomStudyAcquisition, + DicomStudyAcquisitionKey, + DicomStudyDicomFile, + DicomStudyInfo, + DicomStudyOtherFile, + DicomStudyPatient, + DicomStudyScanner, + DicomStudySummary, +) +from lib.import_dicom_study.text import make_hash, read_dicom_date_none +from lib.util import iter_all_files + + +def get_dicom_study_summary(dicom_study_dir_path: str, verbose: bool): + """ + Get information about a DICOM study by reading the files in the DICOM study directory. + """ + + study_info = None + dicom_files: list[DicomStudyDicomFile] = [] + other_files: list[DicomStudyOtherFile] = [] + acquisitions_dict: dict[DicomStudyAcquisitionKey, DicomStudyAcquisition] = dict() + + file_rel_paths = list(iter_all_files(dicom_study_dir_path)) + for i, file_rel_path in enumerate(file_rel_paths, start=1): + if verbose: + print(f"Processing file '{file_rel_path}' ({i}/{len(file_rel_paths)})") + + file_path = os.path.join(dicom_study_dir_path, file_rel_path) + + try: + dicom = pydicom.dcmread(file_path) # type: ignore + if study_info is None: + study_info = get_dicom_study_info(dicom) + + dicom_files.append(get_dicom_file_info(dicom)) + + acquisition_key = DicomStudyAcquisitionKey( + series_number = dicom.SeriesNumber, + echo_numbers = read_value_none(dicom, 'EchoNumbers'), + sequence_name = read_value_none(dicom, 'SequenceName'), + ) + + if acquisition_key not in acquisitions_dict: + acquisitions_dict[acquisition_key] = get_acquisition_info(dicom) + + acquisitions_dict[acquisition_key].number_of_files += 1 + except pydicom.errors.InvalidDicomError: + other_files.append(get_other_file_info(file_path)) + + if study_info is None: + raise Exception("Found no DICOM file in the DICOM study directory.") + + acquisitions = list(acquisitions_dict.values()) + + dicom_files.sort(key=cmp_to_key(compare_dicom_files)) + acquisitions.sort(key=cmp_to_key(compare_acquisitions)) + + return DicomStudySummary(study_info, acquisitions, dicom_files, other_files) + + +def get_dicom_study_info(dicom: pydicom.Dataset) -> DicomStudyInfo: + """ + Get general information about a DICOM study from one of its DICOM files. + """ + + birth_date = read_dicom_date_none(read_value_none(dicom, 'PatientBirthDate')) + scan_date = read_dicom_date_none(read_value_none(dicom, 'StudyDate')) + + patient = DicomStudyPatient( + read_value(dicom, 'PatientID'), + read_value(dicom, 'PatientName'), + read_value_none(dicom, 'PatientSex'), + birth_date, + ) + + scanner = DicomStudyScanner( + read_value(dicom, 'Manufacturer'), + read_value(dicom, 'ManufacturerModelName'), + read_value(dicom, 'DeviceSerialNumber'), + read_value(dicom, 'SoftwareVersions'), + ) + + return DicomStudyInfo( + read_value(dicom, 'StudyInstanceUID'), + patient, + scanner, + scan_date, + read_value_none(dicom, 'InstitutionName'), + read_value(dicom, 'Modality'), + ) + + +def get_dicom_file_info(dicom: pydicom.Dataset) -> DicomStudyDicomFile: + """ + Get information about a DICOM file within a DICOM study. + """ + + return DicomStudyDicomFile( + os.path.basename(dicom.filename), + make_hash(dicom.filename), + read_value_none(dicom, 'SeriesNumber'), + read_value_none(dicom, 'SeriesInstanceUID'), + read_value_none(dicom, 'SeriesDescription'), + read_value_none(dicom, 'InstanceNumber'), + read_value_none(dicom, 'EchoNumbers'), + read_value_none(dicom, 'EchoTime'), + read_value_none(dicom, 'SequenceName'), + ) + + +def get_other_file_info(file_path: str) -> DicomStudyOtherFile: + """ + Get information about a non-DICOM file within a DICOM study. + """ + + return DicomStudyOtherFile( + os.path.basename(file_path), + make_hash(file_path), + ) + + +def get_acquisition_info(dicom: pydicom.Dataset): + """ + Get information about an acquisition within a DICOM study. + """ + + return DicomStudyAcquisition( + read_value(dicom, 'SeriesNumber'), + read_value_none(dicom, 'SeriesInstanceUID'), + read_value_none(dicom, 'SeriesDescription'), + read_value_none(dicom, 'SequenceName'), + read_value_none(dicom, 'EchoTime'), + read_value_none(dicom, 'RepetitionTime'), + read_value_none(dicom, 'InversionTime'), + read_value_none(dicom, 'SliceThickness'), + read_value_none(dicom, 'InPlanePhaseEncodingDirection'), + 0, + read_value_none(dicom, 'Modality'), + ) + + +# Read DICOM attributes. + +def read_value(dicom: pydicom.Dataset, tag: str): + """ + Read a DICOM attribute from a DICOM using a given tag, or raise an exception if there is no + attribute with that tag in the DICOM. + """ + + if tag not in dicom: + raise Exception(f"Expected DICOM tag '{tag}' but found none.") + + return dicom[tag].value + + +def read_value_none(dicom: pydicom.Dataset, tag: str): + """ + Read a DICOM attribute from a DICOM using a given tag, or return `None` if there is no + attribute with that tag in the DICOM. + """ + + if tag not in dicom: + return None + + return dicom[tag].value or None + + +# Comparison functions used to sort the various DICOM study information objects. + +def compare_dicom_files(a: DicomStudyDicomFile, b: DicomStudyDicomFile): + """ + Compare two DICOM file informations in accordance with `functools.cmp_to_key`. + """ + + return \ + compare_int_none(a.series_number, b.series_number) or \ + compare_int_none(a.file_number, b.file_number) or \ + compare_int_none(a.echo_number, b.echo_number) + + +def compare_acquisitions(a: DicomStudyAcquisition, b: DicomStudyAcquisition): + """ + Compare two acquisition informations in accordance with `functools.cmp_to_key`. + """ + + return \ + a.series_number - b.series_number or \ + compare_string_none(a.sequence_name, b.sequence_name) + + +def compare_int_none(a: int | None, b: int | None): + """ + Compare two nullable integers in accordance with `functools.cmp_to_key`. + """ + + match a, b: + case None, None: + return 0 + case _, None: + return -1 + case None, _: + return 1 + case a, b: + return a - b + + +def compare_string_none(a: str | None, b: str | None): + """ + Compare two nullable strings in accordance with `functools.cmp_to_key`. + """ + + match a, b: + case None, None: + return 0 + case _, None: + return -1 + case None, _: + return 1 + case a, b if a < b: + return -1 + case a, b if a > b: + return 1 + case a, b: + return 0 diff --git a/python/lib/dicom/summary_type.py b/python/lib/import_dicom_study/summary_type.py similarity index 57% rename from python/lib/dicom/summary_type.py rename to python/lib/import_dicom_study/summary_type.py index 6bf724b8e..6deeec51c 100644 --- a/python/lib/dicom/summary_type.py +++ b/python/lib/import_dicom_study/summary_type.py @@ -3,9 +3,9 @@ @dataclass -class Patient: +class DicomStudyPatient: """ - DICOM patient object, which contains information about a DICOM patient. + Information about a DICOM study patient. """ id: str @@ -15,9 +15,9 @@ class Patient: @dataclass -class Scanner: +class DicomStudyScanner: """ - DICOM scanner object, which contains information about a DICOM scanner. + Information about a DICOM study scanner. """ manufacturer: str @@ -27,25 +27,23 @@ class Scanner: @dataclass -class Info: +class DicomStudyInfo: """ - General DICOM information object, which contains general information about - a DICOM directory. + General information about a DICOM study. """ study_uid: str - patient: Patient - scanner: Scanner + patient: DicomStudyPatient + scanner: DicomStudyScanner scan_date: date | None institution: str | None modality: str @dataclass -class DicomFile: +class DicomStudyDicomFile: """ - DICOM file object, which contains information about a DICOM file inside a - DICOM directory. + Information about a DICOM file within a DICOM sutdy. """ file_name: str @@ -60,10 +58,9 @@ class DicomFile: @dataclass -class OtherFile: +class DicomStudyOtherFile: """ - Non-DICOM file object, which contains information about a non-DICOM file - inside a DICOM directory. + Information about a non-DICOM file within a DICOM study. """ file_name: str @@ -71,9 +68,9 @@ class OtherFile: @dataclass -class Acquisition: +class DicomStudyAcquisition: """ - DICOM acquisition object, which contains information about a DICOM series. + Information about an acquisition within a DICOM study. """ series_number: int @@ -89,13 +86,24 @@ class Acquisition: modality: str | None +@dataclass(frozen=True) +class DicomStudyAcquisitionKey: + """ + Identifying information about an acquisition within a DICOM study. + """ + + series_number: int + echo_numbers: str | None + sequence_name: str | None + + @dataclass -class Summary: +class DicomStudySummary: """ - DICOM summary object, which contains information about a DICOM directory. + Information about a DICOM study and its files. """ - info: Info - acquis: list[Acquisition] - dicom_files: list[DicomFile] - other_files: list[OtherFile] + info: DicomStudyInfo + acquisitions: list[DicomStudyAcquisition] + dicom_files: list[DicomStudyDicomFile] + other_files: list[DicomStudyOtherFile] diff --git a/python/lib/import_dicom_study/summary_write.py b/python/lib/import_dicom_study/summary_write.py new file mode 100644 index 000000000..f4b2d575e --- /dev/null +++ b/python/lib/import_dicom_study/summary_write.py @@ -0,0 +1,155 @@ +import xml.etree.ElementTree as ET + +from lib.import_dicom_study.summary_type import ( + DicomStudyAcquisition, + DicomStudyDicomFile, + DicomStudyInfo, + DicomStudyOtherFile, + DicomStudySummary, +) +from lib.import_dicom_study.text import write_date_none +from lib.import_dicom_study.text_dict import DictWriter +from lib.import_dicom_study.text_table import TableWriter + + +def write_dicom_study_summary_to_file(dicom_summary: DicomStudySummary, filename: str): + """ + Serialize a DICOM study summary object into a text file. + """ + + string = write_dicom_study_summary(dicom_summary) + with open(filename, 'w') as file: + file.write(string) + + +def write_dicom_study_summary(dicom_summary: DicomStudySummary) -> str: + """ + Serialize a DICOM study summary object into a string. + """ + + xml = ET.Element('STUDY') + ET.SubElement(xml, 'STUDY_INFO').text = write_dicom_study_info(dicom_summary.info) + ET.SubElement(xml, 'FILES').text = write_dicom_study_dicom_files(dicom_summary.dicom_files) + ET.SubElement(xml, 'OTHERS').text = write_dicom_study_other_files(dicom_summary.other_files) + ET.SubElement(xml, 'ACQUISITIONS').text = write_dicom_study_acquisitions(dicom_summary.acquisitions) + ET.SubElement(xml, 'SUMMARY').text = write_dicom_study_ending(dicom_summary) + ET.indent(xml, space='') + return ET.tostring(xml, encoding='unicode') + '\n' + + +def write_dicom_study_info(info: DicomStudyInfo) -> str: + """ + Serialize general information about a DICOM study. + """ + + return '\n' + DictWriter([ + ('Unique Study ID' , info.study_uid), + ('Patient Name' , info.patient.name), + ('Patient ID' , info.patient.id), + ('Patient date of birth' , write_date_none(info.patient.birth_date)), + ('Patient Sex' , info.patient.sex), + ('Scan Date' , write_date_none(info.scan_date)), + ('Scanner Manufacturer' , info.scanner.manufacturer), + ('Scanner Model Name' , info.scanner.model), + ('Scanner Serial Number' , info.scanner.serial_number), + ('Scanner Software Version' , info.scanner.software_version), + ('Institution Name' , info.institution), + ('Modality' , info.modality), + ]).write() + + +def write_dicom_study_dicom_files(dicom_files: list[DicomStudyDicomFile]) -> str: + """ + Serialize information about the DICOM files of a DICOM study into a table. + """ + + writer = TableWriter() + writer.append_row(['SN', 'FN', 'EN', 'Series', 'md5sum', 'File name']) + for dicom_file in dicom_files: + writer.append_row([ + dicom_file.series_number, + dicom_file.file_number, + dicom_file.echo_number, + dicom_file.series_description, + dicom_file.md5_sum, + dicom_file.file_name, + ]) + + return '\n' + writer.write() + + +def write_dicom_study_other_files(other_files: list[DicomStudyOtherFile]) -> str: + """ + Serialize information about the non-DICOM files of a DICOM study into a table. + """ + + writer = TableWriter() + writer.append_row(['md5sum', 'File name']) + for other_file in other_files: + writer.append_row([ + other_file.md5_sum, + other_file.file_name, + ]) + + return '\n' + writer.write() + + +def write_dicom_study_acquisitions(acquisitions: list[DicomStudyAcquisition]) -> str: + """ + Serialize information about the acquisitions of a DICOM study into a table. + """ + + writer = TableWriter() + writer.append_row([ + 'Series (SN)', + 'Name of series', + 'Seq Name', + 'echoT ms', + 'repT ms', + 'invT ms', + 'sth mm', + 'PhEnc', + 'NoF', + 'Series UID', + 'Mod' + ]) + + for acquisition in acquisitions: + writer.append_row([ + acquisition.series_number, + acquisition.series_description, + acquisition.sequence_name, + acquisition.echo_time, + acquisition.repetition_time, + acquisition.inversion_time, + acquisition.slice_thickness, + acquisition.phase_encoding, + acquisition.number_of_files, + acquisition.series_uid, + acquisition.modality, + ]) + + return '\n' + writer.write() + + +def write_dicom_study_ending(dicom_summary: DicomStudySummary) -> str: + """ + Serialize some additional information about a DICOM study. + """ + + birth_date = dicom_summary.info.patient.birth_date + scan_date = dicom_summary.info.scan_date + + if birth_date and scan_date: + years = scan_date.year - birth_date.year + months = scan_date.month - birth_date.month + days = scan_date.day - birth_date.day + total = round(years + months / 12 + days / 365.0, 2) + age = f'{total} or {years} years, {months} months {days} days' + else: + age = '' + + return '\n' + DictWriter([ + ('Total number of files', len(dicom_summary.dicom_files) + len(dicom_summary.other_files)), + ('Age at scan', age), + ]).write() diff --git a/python/lib/dicom/text.py b/python/lib/import_dicom_study/text.py similarity index 97% rename from python/lib/dicom/text.py rename to python/lib/import_dicom_study/text.py index 9d2269008..2296ed541 100644 --- a/python/lib/dicom/text.py +++ b/python/lib/import_dicom_study/text.py @@ -3,9 +3,9 @@ different types of values. """ -from datetime import datetime, date import hashlib import os +from datetime import date, datetime def write_value(value: str | int | float | None): diff --git a/python/lib/dicom/text_dict.py b/python/lib/import_dicom_study/text_dict.py similarity index 76% rename from python/lib/dicom/text_dict.py rename to python/lib/import_dicom_study/text_dict.py index ac8fc08d7..91ea94088 100644 --- a/python/lib/dicom/text_dict.py +++ b/python/lib/import_dicom_study/text_dict.py @@ -1,9 +1,9 @@ -from lib.dicom.text import write_value +from lib.import_dicom_study.text import write_value class DictWriter: """ - Writer for a text dictionary, i.e, a text of the form: + Writer for a text dictionary, that is, a text of the form: Key 1 : Value 1 Key 2 : Value 2 @@ -15,8 +15,9 @@ def __init__(self, entries: list[tuple[str, str | int | float | None]]): def get_keys_length(self): """ - Get the maximal length of the keys, used for padding + Get the maximal length of the keys, used for padding. """ + length = 0 for entry in self.entries: key = entry[0] @@ -27,7 +28,7 @@ def get_keys_length(self): def write(self): """ - Serialize the text dictionary into a string + Serialize the text dictionary into a string. """ if not self.entries: diff --git a/python/lib/dicom/text_table.py b/python/lib/import_dicom_study/text_table.py similarity index 90% rename from python/lib/dicom/text_table.py rename to python/lib/import_dicom_study/text_table.py index 0c3109ca8..7ffbface4 100644 --- a/python/lib/dicom/text_table.py +++ b/python/lib/import_dicom_study/text_table.py @@ -1,9 +1,9 @@ -from lib.dicom.text import write_value +from lib.import_dicom_study.text import write_value class TableWriter: """ - Writer for a text table, i.e, a table of the form: + Writer for a text table, that is, a table of the form: Field 1 | Field 2 | Field 3 Value 1 | Value 2 | Value 3 @@ -18,7 +18,7 @@ def __init__(self): def get_cells_lengths(self): """ - Get the longest value length of each column, used for padding + Get the longest value length of each column, used for padding. """ lengths = [0] * len(self.rows[0]) diff --git a/python/lib/util.py b/python/lib/util.py new file mode 100644 index 000000000..296423e02 --- /dev/null +++ b/python/lib/util.py @@ -0,0 +1,14 @@ +import os +from collections.abc import Generator + + +def iter_all_files(dir_path: str) -> Generator[str, None, None]: + """ + Iterate through all the files in a directory recursively, and yield the path of each file + relative to that directory. + """ + + for sub_dir_path, _, file_names in os.walk(dir_path): + for file_name in file_names: + file_path = os.path.join(sub_dir_path, file_name) + yield os.path.relpath(file_path, start=dir_path) diff --git a/python/lib/utilities.py b/python/lib/utilities.py index cf7384b01..8fc8e4055 100755 --- a/python/lib/utilities.py +++ b/python/lib/utilities.py @@ -86,25 +86,6 @@ def append_to_tsv_file(new_tsv_file, old_tsv_file, key_value_check, verbose): writer.writerow(data) -def get_all_files(dir: str) -> list[str]: - """ - Recursively get the all the files inside a given directory, without including the directories - themselves. The returned paths are relative to the given directory. - """ - - def get_all_files_rec(dir: str, path: str): - if os.path.isdir(dir + '/' + path): - files = [] - for file in os.listdir(dir + '/' + path): - files += get_all_files_rec(dir, path + '/' + file) - - return files - - return [path] - - return get_all_files_rec(dir, '') - - def copy_file(file_orig, file_copy, verbose): """ Copies a file to a new location. If something goes wrong during the copy diff --git a/python/scripts/import_dicom_study.py b/python/scripts/import_dicom_study.py new file mode 100755 index 000000000..fad1e2db1 --- /dev/null +++ b/python/scripts/import_dicom_study.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python + +import gzip +import os +import shutil +import tarfile +import tempfile +from typing import Any, cast + +import lib.database +import lib.exitcode +import lib.import_dicom_study.text +from lib.db.models.dicom_archive import DbDicomArchive +from lib.db.queries.dicom_archive import try_get_dicom_archive_with_study_uid +from lib.db.queries.mri_upload import try_get_mri_upload_with_id +from lib.db.queries.session import try_get_session_with_id +from lib.import_dicom_study.dicom_database import insert_dicom_archive, update_dicom_archive +from lib.import_dicom_study.import_log import ( + make_dicom_study_import_log, + write_dicom_study_import_log_to_file, + write_dicom_study_import_log_to_string, +) +from lib.import_dicom_study.summary_get import get_dicom_study_summary +from lib.import_dicom_study.summary_write import write_dicom_study_summary_to_file +from lib.logging import log, log_error_exit, log_warning +from lib.lorisgetopt import LorisGetOpt +from lib.make_env import make_env +from lib.util import iter_all_files + + +class Args: + profile: str + source: str + target: str + insert: bool + update: bool + upload: int | None + session: int | None + overwrite: bool + year: bool + verbose: bool + + def __init__(self, options_dict: dict[str, Any]): + self.profile = options_dict['profile']['value'] + self.source = os.path.normpath(options_dict['source']['value']) + self.target = os.path.normpath(options_dict['target']['value']) + self.year = options_dict['year']['value'] + self.overwrite = options_dict['overwrite']['value'] + self.insert = options_dict['insert']['value'] + self.update = options_dict['update']['value'] + self.upload = options_dict['upload']['value'] + self.session = options_dict['session']['value'] + self.verbose = options_dict['verbose']['value'] + + +def main() -> None: + usage = ( + "\n" + "********************************************************************\n" + " DICOM STUDY IMPORT SCRIPT\n" + "********************************************************************\n" + "This script reads a directory containing the DICOM files of a study, processes the\n" + "directory into a structured and compressed archive, and inserts or uploads the study\n" + "into the LORIS database.\n" + "\n" + "Usage: import_dicom_study.py -p -s -t ...\n" + "\n" + "Options: \n" + "\t-p, --profile : Name of the LORIS Python configuration file (usually\n" + "\t 'database_config.py')\n" + "\t-s, --source : Path of the source directory containing the DICOM files of the" + "\t study.\n" + "\t-t, --target : Path of the directory in which to create the resulting DICOM\n" + "\t archive\n" + "\t --overwrite : Overwrite the DICOM archive file if it already exists.\n" + "\t --year : Create the archive in a year subdirectory according to its scan\n" + "\t date.\n" + "\t --insert : Insert the created DICOM archive in the database (requires the archive\n" + "\t to not be already inserted).\n" + "\t --update : Update the DICOM archive in the database (requires the archive to be\n" + "\t already be inserted), generally used with '--overwrite'.\n" + "\t --upload : Associate the DICOM study with an existing MRI upload.\n" + "\t --session : Associate the DICOM study with an existing session.\n" + "\t-v, --verbose : If set, be verbose\n" + "\n" + "Required options: \n" + "\t--profile\n" + "\t--source\n" + "\t--target\n" + ) + + # NOTE: Some options do not have short options but LorisGetOpt does not support that, so we + # repeat the long names. + options_dict = { + "profile": { + "value": None, "required": True, "expect_arg": True, "short_opt": "p", "is_path": False + }, + "source": { + "value": None, "required": True, "expect_arg": True, "short_opt": "s", "is_path": True, + }, + "target": { + "value": None, "required": True, "expect_arg": True, "short_opt": "t", "is_path": True, + }, + "year": { + "value": False, "required": False, "expect_arg": False, "short_opt": "year", "is_path": False, + }, + "overwrite": { + "value": False, "required": False, "expect_arg": False, "short_opt": "overwrite", "is_path": False, + }, + "insert": { + "value": False, "required": False, "expect_arg": False, "short_opt": "insert", "is_path": False, + }, + "update": { + "value": False, "required": False, "expect_arg": False, "short_opt": "update", "is_path": False, + }, + "upload": { + "value": None, "required": False, "expect_arg": True, "short_opt": "upload", "is_path": False, + }, + "session": { + "value": None, "required": False, "expect_arg": True, "short_opt": "session", "is_path": False, + }, + "verbose": { + "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False + }, + "help": { + "value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False + }, + } + + # Get the CLI arguments and connect to the database. + + loris_getopt_obj = LorisGetOpt(usage, options_dict, os.path.basename(__file__[:-3])) + env = make_env(loris_getopt_obj) + args = Args(loris_getopt_obj.options_dict) + + # Check arguments. + + if not os.path.isdir(args.source) or not os.access(args.source, os.R_OK): + log_error_exit( + env, + "Argument '--source' must be a readable directory path.", + lib.exitcode.INVALID_ARG, + ) + + if not os.path.isdir(args.target) or not os.access(args.target, os.W_OK): + log_error_exit( + env, + "Argument '--target' must be a writable directory path.", + lib.exitcode.INVALID_ARG, + ) + + if args.insert and args.update: + log_error_exit( + env, + "Arguments '--insert' and '--update' cannot be used both at the same time.", + lib.exitcode.INVALID_ARG, + ) + + if (args.session is not None or args.upload is not None) and not (args.insert or args.update): + log_error_exit( + env, + "Argument '--insert' or '--update' must be used when '--upload' or '--session' is used.", + lib.exitcode.INVALID_ARG, + ) + + # Utility variables. + + dicom_study_name = os.path.basename(args.source) + + # Check MRI upload. + + mri_upload = None + if args.upload is not None: + mri_upload = try_get_mri_upload_with_id(env.db, args.upload) + if mri_upload is None: + log_error_exit( + env, + f"No MRI upload found in LORIS with ID {args.upload}.", + lib.exitcode.UPDATE_FAILURE, + ) + + session = None + if args.session is not None: + session = try_get_session_with_id(env.db, args.session) + if session is None: + log_error_exit( + env, + f"No session found in LORIS with ID {args.session}.", + lib.exitcode.UPDATE_FAILURE, + ) + + log(env, "Extracting DICOM information... (may take a long time)") + + dicom_summary = get_dicom_study_summary(args.source, args.verbose) + + log(env, "Checking if the DICOM study is already inserted in LORIS...") + + dicom_archive = try_get_dicom_archive_with_study_uid(env.db, dicom_summary.info.study_uid) + + if dicom_archive is not None: + log(env, "Found the DICOM study in LORIS.") + + if args.insert: + log_error_exit( + env, + ( + "Cannot insert the DICOM study since it is already inserted in LORIS. Use" + " arguments '--update' and '--overwrite' to update the currently insrted DICOM" + " study.\n" + f"Inserted DICOM study import log:\n{dicom_archive.create_info}" + ), + lib.exitcode.INSERT_FAILURE, + ) + + if dicom_archive is None: + log(env, "Did not find the DICOM study in LORIS.") + + if args.update: + log_error_exit( + env, + ( + "Cannot update the DICOM study since it is not already inserted in LORIS. Use" + " argument '--insert' to insert the DICOM study in LORIS." + ), + lib.exitcode.UPDATE_FAILURE, + ) + + log(env, 'Checking DICOM scan date...') + + dicom_archive_dir_path = args.target + + if dicom_summary.info.scan_date is None: + log_warning(env, "No DICOM scan date found in the DICOM files.") + + if args.year: + log_error_exit( + env, + "Cannot put the DICOM study in a year subdirectory since no scan date was found." + " Remove the argument '--year' to import the study without putting it in a year" + " subdirectory." + ) + + dicom_archive_name = f'DCM_{dicom_study_name}' + else: + log(env, f"Found DICOM scan date: {dicom_summary.info.scan_date}") + + scan_date_string = lib.import_dicom_study.text.write_date(dicom_summary.info.scan_date) + dicom_archive_name = f'DCM_{scan_date_string}_{dicom_study_name}' + + if args.year: + dicom_archive_dir_path = os.path.join(dicom_archive_dir_path, str(dicom_summary.info.scan_date.year)) + + if not os.path.exists(dicom_archive_dir_path): + log(env, f"Creating year directory '{dicom_archive_dir_path}'") + os.mkdir(dicom_archive_dir_path) + + dicom_archive_path = os.path.join(dicom_archive_dir_path, f'{dicom_archive_name}.tar') + + if os.path.exists(dicom_archive_path): + if not args.overwrite: + log_error_exit( + env, + f"File '{dicom_archive_path}' already exists. Use argument '--overwrite' to overwrite it", + ) + + log_warning(env, f"Overwriting file '{dicom_archive_path}'...") + + os.remove(dicom_archive_path) + + with tempfile.TemporaryDirectory() as tmp_dir_path: + tar_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.tar') + zip_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.tar.gz') + summary_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.meta') + log_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.log') + + log(env, "Copying the DICOM files into a new tar archive...") + + with tarfile.open(tar_path, 'w') as tar: + for file_rel_path in iter_all_files(args.source): + file_path = os.path.join(args.source, file_rel_path) + file_tar_path = os.path.join(os.path.basename(args.source), file_rel_path) + tar.add(file_path, arcname=file_tar_path) + + log(env, "Calculating the tar archive MD5 sum...") + + tar_md5_sum = lib.import_dicom_study.text.make_hash(tar_path, True) + + log(env, "Zipping the tar archive... (may take a long time)") + + with open(tar_path, 'rb') as tar: + # 6 is the default compression level of the `tar` command, Python's + # default is 9, which is more compressed but also a lot slower. + with gzip.open(zip_path, 'wb', compresslevel=6) as zip: + shutil.copyfileobj(tar, zip) + + log(env, "Calculating the zipped tar archive MD5 sum...") + + zip_md5_sum = lib.import_dicom_study.text.make_hash(zip_path, True) + + log(env, "Creating DICOM study import log...") + + dicom_import_log = make_dicom_study_import_log(args.source, dicom_archive_path, tar_md5_sum, zip_md5_sum) + + if args.verbose: + dicom_import_log_string = write_dicom_study_import_log_to_string(dicom_import_log) + log(env, f"The archive will be created with the following arguments:\n{dicom_import_log_string}") + + log(env, "Writing DICOM study summary file...") + + write_dicom_study_summary_to_file(dicom_summary, summary_path) + + log(env, "Writing DICOM study import log file...") + + write_dicom_study_import_log_to_file(dicom_import_log, log_path) + + log(env, 'Copying files into the final DICOM study archive...') + + with tarfile.open(dicom_archive_path, 'w') as tar: + tar.add(zip_path, os.path.basename(zip_path)) + tar.add(summary_path, os.path.basename(summary_path)) + tar.add(log_path, os.path.basename(log_path)) + + log(env, "Calculating final DICOM study archive MD5 sum...") + + dicom_import_log.archive_md5_sum = lib.import_dicom_study.text.make_hash(dicom_import_log.target_path, True) + + archive_location = os.path.relpath(dicom_archive_path, start=args.target) + + if args.insert: + log(env, "Inserting the DICOM study in the LORIS database...") + + insert_dicom_archive(env.db, dicom_summary, dicom_import_log, archive_location) + + if args.update: + log(env, "Updating the DICOM study in the LORIS database...") + + # Safe because we previously checked that the DICOM study is in LORIS. + dicom_archive = cast(DbDicomArchive, dicom_archive) + + update_dicom_archive(env.db, dicom_archive, dicom_summary, dicom_import_log, archive_location) + + if mri_upload is not None: + log(env, "Updating the DICOM study MRI upload...") + + # Safe because we previously checked that the DICOM study is in LORIS. + dicom_archive = cast(DbDicomArchive, dicom_archive) + + dicom_archive.upload = mri_upload + env.db.commit() + + if session is not None: + log(env, "Updating the DICOM study session...") + + # Safe because we previously checked that the DICOM study is in LORIS. + dicom_archive = cast(DbDicomArchive, dicom_archive) + + dicom_archive.session = session + + if mri_upload is not None: + mri_upload.session = session + + env.db.commit() + + log(env, "Success !") + + +if __name__ == '__main__': + main() diff --git a/python/dicom_summary.py b/python/scripts/summarize_dicom_study.py old mode 100644 new mode 100755 similarity index 59% rename from python/dicom_summary.py rename to python/scripts/summarize_dicom_study.py index b1abede34..a0fffbb32 --- a/python/dicom_summary.py +++ b/python/scripts/summarize_dicom_study.py @@ -1,13 +1,12 @@ #!/usr/bin/env python import argparse -from dataclasses import dataclass import sys -import traceback +from dataclasses import dataclass -import lib.dicom.summary_make -import lib.dicom.summary_write import lib.exitcode +from lib.import_dicom_study.summary_get import get_dicom_study_summary +from lib.import_dicom_study.summary_write import write_dicom_study_summary parser = argparse.ArgumentParser(description=( 'Read a DICOM directory and print the DICOM summary of this directory ' @@ -30,20 +29,23 @@ class Args: verbose: bool -def main(): +def main() -> None: parsed_args = parser.parse_args() args = Args(parsed_args.directory, parsed_args.verbose) try: - summary = lib.dicom.summary_make.make(args.directory, args.verbose) + summary = get_dicom_study_summary(args.directory, args.verbose) except Exception as e: - print(f'ERROR: Cannot create a summary for the directory \'{args.directory}\'.', file=sys.stderr) - print('Exception message:', file=sys.stderr) - print(e, file=sys.stderr) - traceback.print_exc(file=sys.stderr) + print( + ( + f"ERROR: Cannot create a summary for the directory '{args.directory}'.\n" + f"Exception message:\n{e}" + ), + file=sys.stderr + ) exit(lib.exitcode.INVALID_DICOM) - print(lib.dicom.summary_write.write_to_string(summary)) + print(write_dicom_study_summary(summary)) if __name__ == "__main__": From 7c870d3cc452520aba3d4a1e3b79c5baa764079b Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Fri, 14 Feb 2025 02:52:26 +0000 Subject: [PATCH 03/13] remove --year and --target options --- python/scripts/import_dicom_study.py | 62 ++++++++-------------------- 1 file changed, 18 insertions(+), 44 deletions(-) diff --git a/python/scripts/import_dicom_study.py b/python/scripts/import_dicom_study.py index fad1e2db1..1e160e252 100755 --- a/python/scripts/import_dicom_study.py +++ b/python/scripts/import_dicom_study.py @@ -11,6 +11,7 @@ import lib.exitcode import lib.import_dicom_study.text from lib.db.models.dicom_archive import DbDicomArchive +from lib.db.queries.config import get_config_with_setting_name from lib.db.queries.dicom_archive import try_get_dicom_archive_with_study_uid from lib.db.queries.mri_upload import try_get_mri_upload_with_id from lib.db.queries.session import try_get_session_with_id @@ -31,20 +32,16 @@ class Args: profile: str source: str - target: str insert: bool update: bool upload: int | None session: int | None overwrite: bool - year: bool verbose: bool def __init__(self, options_dict: dict[str, Any]): self.profile = options_dict['profile']['value'] self.source = os.path.normpath(options_dict['source']['value']) - self.target = os.path.normpath(options_dict['target']['value']) - self.year = options_dict['year']['value'] self.overwrite = options_dict['overwrite']['value'] self.insert = options_dict['insert']['value'] self.update = options_dict['update']['value'] @@ -63,18 +60,14 @@ def main() -> None: "directory into a structured and compressed archive, and inserts or uploads the study\n" "into the LORIS database.\n" "\n" - "Usage: import_dicom_study.py -p -s -t ...\n" + "Usage: import_dicom_study.py -p -s ...\n" "\n" "Options: \n" "\t-p, --profile : Name of the LORIS Python configuration file (usually\n" "\t 'database_config.py')\n" "\t-s, --source : Path of the source directory containing the DICOM files of the" "\t study.\n" - "\t-t, --target : Path of the directory in which to create the resulting DICOM\n" - "\t archive\n" "\t --overwrite : Overwrite the DICOM archive file if it already exists.\n" - "\t --year : Create the archive in a year subdirectory according to its scan\n" - "\t date.\n" "\t --insert : Insert the created DICOM archive in the database (requires the archive\n" "\t to not be already inserted).\n" "\t --update : Update the DICOM archive in the database (requires the archive to be\n" @@ -98,12 +91,6 @@ def main() -> None: "source": { "value": None, "required": True, "expect_arg": True, "short_opt": "s", "is_path": True, }, - "target": { - "value": None, "required": True, "expect_arg": True, "short_opt": "t", "is_path": True, - }, - "year": { - "value": False, "required": False, "expect_arg": False, "short_opt": "year", "is_path": False, - }, "overwrite": { "value": False, "required": False, "expect_arg": False, "short_opt": "overwrite", "is_path": False, }, @@ -142,13 +129,6 @@ def main() -> None: lib.exitcode.INVALID_ARG, ) - if not os.path.isdir(args.target) or not os.access(args.target, os.W_OK): - log_error_exit( - env, - "Argument '--target' must be a writable directory path.", - lib.exitcode.INVALID_ARG, - ) - if args.insert and args.update: log_error_exit( env, @@ -227,34 +207,30 @@ def main() -> None: log(env, 'Checking DICOM scan date...') - dicom_archive_dir_path = args.target + # TODO: Factorize this into a `lib.config` module and add some checks (directory exists, permissions). + dicom_archive_dir_path = get_config_with_setting_name(env.db, 'tarchiveLibraryDir').value + if dicom_archive_dir_path is None: + log_error_exit(env, "No value found for configuration setting 'tarchiveLibraryDir'.") if dicom_summary.info.scan_date is None: log_warning(env, "No DICOM scan date found in the DICOM files.") - if args.year: - log_error_exit( - env, - "Cannot put the DICOM study in a year subdirectory since no scan date was found." - " Remove the argument '--year' to import the study without putting it in a year" - " subdirectory." - ) - - dicom_archive_name = f'DCM_{dicom_study_name}' + dicom_archive_rel_path = f'DCM_{dicom_study_name}.tar' else: log(env, f"Found DICOM scan date: {dicom_summary.info.scan_date}") scan_date_string = lib.import_dicom_study.text.write_date(dicom_summary.info.scan_date) - dicom_archive_name = f'DCM_{scan_date_string}_{dicom_study_name}' - - if args.year: - dicom_archive_dir_path = os.path.join(dicom_archive_dir_path, str(dicom_summary.info.scan_date.year)) + dicom_archive_rel_path = os.path.join( + str(dicom_summary.info.scan_date.year), + f'DCM_{scan_date_string}_{dicom_study_name}.tar', + ) - if not os.path.exists(dicom_archive_dir_path): - log(env, f"Creating year directory '{dicom_archive_dir_path}'") - os.mkdir(dicom_archive_dir_path) + dicom_archive_year_dir_path = os.path.join(dicom_archive_dir_path, str(dicom_summary.info.scan_date.year)) + if not os.path.exists(dicom_archive_year_dir_path): + log(env, f"Creating year directory '{dicom_archive_year_dir_path}'...") + os.mkdir(dicom_archive_year_dir_path) - dicom_archive_path = os.path.join(dicom_archive_dir_path, f'{dicom_archive_name}.tar') + dicom_archive_path = os.path.join(dicom_archive_dir_path, dicom_archive_rel_path) if os.path.exists(dicom_archive_path): if not args.overwrite: @@ -324,12 +300,10 @@ def main() -> None: dicom_import_log.archive_md5_sum = lib.import_dicom_study.text.make_hash(dicom_import_log.target_path, True) - archive_location = os.path.relpath(dicom_archive_path, start=args.target) - if args.insert: log(env, "Inserting the DICOM study in the LORIS database...") - insert_dicom_archive(env.db, dicom_summary, dicom_import_log, archive_location) + insert_dicom_archive(env.db, dicom_summary, dicom_import_log, dicom_archive_rel_path) if args.update: log(env, "Updating the DICOM study in the LORIS database...") @@ -337,7 +311,7 @@ def main() -> None: # Safe because we previously checked that the DICOM study is in LORIS. dicom_archive = cast(DbDicomArchive, dicom_archive) - update_dicom_archive(env.db, dicom_archive, dicom_summary, dicom_import_log, archive_location) + update_dicom_archive(env.db, dicom_archive, dicom_summary, dicom_import_log, dicom_archive_rel_path) if mri_upload is not None: log(env, "Updating the DICOM study MRI upload...") From 47f5cc95c7b2f6fa6c2c274749c1a1ba96040747 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Fri, 14 Feb 2025 16:42:23 +0000 Subject: [PATCH 04/13] get session from config file --- python/lib/db/queries/session.py | 10 --------- python/scripts/import_dicom_study.py | 33 +++++++++++++++------------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/python/lib/db/queries/session.py b/python/lib/db/queries/session.py index e0d45086b..2e2b8b4b2 100644 --- a/python/lib/db/queries/session.py +++ b/python/lib/db/queries/session.py @@ -17,13 +17,3 @@ def try_get_session_with_cand_id_visit_label(db: Database, cand_id: int, visit_l .where(DbSession.visit_label == visit_label) .where(DbCandidate.cand_id == cand_id) ).scalar_one_or_none() - - -def try_get_session_with_id(db: Database, session_id: int): - """ - Get a session from the database using its ID, or return `None` if no session is found. - """ - - return db.execute(select(DbSession) - .where(DbSession.id == session_id) - ).scalar_one_or_none() diff --git a/python/scripts/import_dicom_study.py b/python/scripts/import_dicom_study.py index 1e160e252..887941fa4 100755 --- a/python/scripts/import_dicom_study.py +++ b/python/scripts/import_dicom_study.py @@ -14,7 +14,7 @@ from lib.db.queries.config import get_config_with_setting_name from lib.db.queries.dicom_archive import try_get_dicom_archive_with_study_uid from lib.db.queries.mri_upload import try_get_mri_upload_with_id -from lib.db.queries.session import try_get_session_with_id +from lib.get_subject_session import get_subject_session from lib.import_dicom_study.dicom_database import insert_dicom_archive, update_dicom_archive from lib.import_dicom_study.import_log import ( make_dicom_study_import_log, @@ -27,6 +27,7 @@ from lib.lorisgetopt import LorisGetOpt from lib.make_env import make_env from lib.util import iter_all_files +from lib.validate_subject_info import validate_subject_info class Args: @@ -35,7 +36,7 @@ class Args: insert: bool update: bool upload: int | None - session: int | None + session: bool overwrite: bool verbose: bool @@ -73,7 +74,8 @@ def main() -> None: "\t --update : Update the DICOM archive in the database (requires the archive to be\n" "\t already be inserted), generally used with '--overwrite'.\n" "\t --upload : Associate the DICOM study with an existing MRI upload.\n" - "\t --session : Associate the DICOM study with an existing session.\n" + "\t --session : Associate the DICOM study with an existing session using the LORIS-MRI\n" + "\t Python configuration.\n" "\t-v, --verbose : If set, be verbose\n" "\n" "Required options: \n" @@ -104,7 +106,7 @@ def main() -> None: "value": None, "required": False, "expect_arg": True, "short_opt": "upload", "is_path": False, }, "session": { - "value": None, "required": False, "expect_arg": True, "short_opt": "session", "is_path": False, + "value": False, "required": False, "expect_arg": False, "short_opt": "session", "is_path": False, }, "verbose": { "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False @@ -136,7 +138,7 @@ def main() -> None: lib.exitcode.INVALID_ARG, ) - if (args.session is not None or args.upload is not None) and not (args.insert or args.update): + if (args.session or args.upload is not None) and not (args.insert or args.update): log_error_exit( env, "Argument '--insert' or '--update' must be used when '--upload' or '--session' is used.", @@ -159,16 +161,6 @@ def main() -> None: lib.exitcode.UPDATE_FAILURE, ) - session = None - if args.session is not None: - session = try_get_session_with_id(env.db, args.session) - if session is None: - log_error_exit( - env, - f"No session found in LORIS with ID {args.session}.", - lib.exitcode.UPDATE_FAILURE, - ) - log(env, "Extracting DICOM information... (may take a long time)") dicom_summary = get_dicom_study_summary(args.source, args.verbose) @@ -205,6 +197,17 @@ def main() -> None: lib.exitcode.UPDATE_FAILURE, ) + session = None + if args.session: + # TODO: Factorize this code into a streamlined way to get the session from the configuration. + subject_info = loris_getopt_obj.config_info.get_subject_info( # type: ignore + loris_getopt_obj.db, + str(dicom_summary.info.patient.name) + ) + + validate_subject_info(env.db, subject_info) + session = get_subject_session(env, subject_info) + log(env, 'Checking DICOM scan date...') # TODO: Factorize this into a `lib.config` module and add some checks (directory exists, permissions). From 2031e80f9a563601c9bb19b77a32cfed8bc28b46 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Sat, 15 Feb 2025 05:45:51 +0000 Subject: [PATCH 05/13] remove unused upload argument --- python/scripts/import_dicom_study.py | 37 ++-------------------------- 1 file changed, 2 insertions(+), 35 deletions(-) diff --git a/python/scripts/import_dicom_study.py b/python/scripts/import_dicom_study.py index 887941fa4..656967c52 100755 --- a/python/scripts/import_dicom_study.py +++ b/python/scripts/import_dicom_study.py @@ -13,7 +13,6 @@ from lib.db.models.dicom_archive import DbDicomArchive from lib.db.queries.config import get_config_with_setting_name from lib.db.queries.dicom_archive import try_get_dicom_archive_with_study_uid -from lib.db.queries.mri_upload import try_get_mri_upload_with_id from lib.get_subject_session import get_subject_session from lib.import_dicom_study.dicom_database import insert_dicom_archive, update_dicom_archive from lib.import_dicom_study.import_log import ( @@ -35,7 +34,6 @@ class Args: source: str insert: bool update: bool - upload: int | None session: bool overwrite: bool verbose: bool @@ -46,7 +44,6 @@ def __init__(self, options_dict: dict[str, Any]): self.overwrite = options_dict['overwrite']['value'] self.insert = options_dict['insert']['value'] self.update = options_dict['update']['value'] - self.upload = options_dict['upload']['value'] self.session = options_dict['session']['value'] self.verbose = options_dict['verbose']['value'] @@ -73,7 +70,6 @@ def main() -> None: "\t to not be already inserted).\n" "\t --update : Update the DICOM archive in the database (requires the archive to be\n" "\t already be inserted), generally used with '--overwrite'.\n" - "\t --upload : Associate the DICOM study with an existing MRI upload.\n" "\t --session : Associate the DICOM study with an existing session using the LORIS-MRI\n" "\t Python configuration.\n" "\t-v, --verbose : If set, be verbose\n" @@ -102,9 +98,6 @@ def main() -> None: "update": { "value": False, "required": False, "expect_arg": False, "short_opt": "update", "is_path": False, }, - "upload": { - "value": None, "required": False, "expect_arg": True, "short_opt": "upload", "is_path": False, - }, "session": { "value": False, "required": False, "expect_arg": False, "short_opt": "session", "is_path": False, }, @@ -138,10 +131,10 @@ def main() -> None: lib.exitcode.INVALID_ARG, ) - if (args.session or args.upload is not None) and not (args.insert or args.update): + if args.session and not (args.insert or args.update): log_error_exit( env, - "Argument '--insert' or '--update' must be used when '--upload' or '--session' is used.", + "Argument '--insert' or '--update' must be used when '--session' is used.", lib.exitcode.INVALID_ARG, ) @@ -149,18 +142,6 @@ def main() -> None: dicom_study_name = os.path.basename(args.source) - # Check MRI upload. - - mri_upload = None - if args.upload is not None: - mri_upload = try_get_mri_upload_with_id(env.db, args.upload) - if mri_upload is None: - log_error_exit( - env, - f"No MRI upload found in LORIS with ID {args.upload}.", - lib.exitcode.UPDATE_FAILURE, - ) - log(env, "Extracting DICOM information... (may take a long time)") dicom_summary = get_dicom_study_summary(args.source, args.verbose) @@ -316,26 +297,12 @@ def main() -> None: update_dicom_archive(env.db, dicom_archive, dicom_summary, dicom_import_log, dicom_archive_rel_path) - if mri_upload is not None: - log(env, "Updating the DICOM study MRI upload...") - - # Safe because we previously checked that the DICOM study is in LORIS. - dicom_archive = cast(DbDicomArchive, dicom_archive) - - dicom_archive.upload = mri_upload - env.db.commit() - if session is not None: log(env, "Updating the DICOM study session...") # Safe because we previously checked that the DICOM study is in LORIS. dicom_archive = cast(DbDicomArchive, dicom_archive) - dicom_archive.session = session - - if mri_upload is not None: - mri_upload.session = session - env.db.commit() log(env, "Success !") From b34e515b6b8c234a7c0e724123fd5f2748ac10df Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Wed, 26 Feb 2025 13:07:37 +0000 Subject: [PATCH 06/13] fix little oopsie for --session --- python/scripts/import_dicom_study.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/scripts/import_dicom_study.py b/python/scripts/import_dicom_study.py index 656967c52..fae12cc1f 100755 --- a/python/scripts/import_dicom_study.py +++ b/python/scripts/import_dicom_study.py @@ -287,7 +287,7 @@ def main() -> None: if args.insert: log(env, "Inserting the DICOM study in the LORIS database...") - insert_dicom_archive(env.db, dicom_summary, dicom_import_log, dicom_archive_rel_path) + dicom_archive = insert_dicom_archive(env.db, dicom_summary, dicom_import_log, dicom_archive_rel_path) if args.update: log(env, "Updating the DICOM study in the LORIS database...") From d3bf8209d37f1e3a3d80a8f70e66b012bd280312 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Sun, 2 Mar 2025 16:26:29 +0000 Subject: [PATCH 07/13] handle large files hash --- python/lib/import_dicom_study/text.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/lib/import_dicom_study/text.py b/python/lib/import_dicom_study/text.py index 2296ed541..a317ef52b 100644 --- a/python/lib/import_dicom_study/text.py +++ b/python/lib/import_dicom_study/text.py @@ -70,8 +70,13 @@ def make_hash(path: str, with_name: bool = False): Get the MD5 sum hash of a file, with or without the filename appended. """ + # Since the file given to this function can be up to several gigabytes, we read it in chunks to + # avoid running out of memory. + hash = hashlib.md5() with open(path, 'rb') as file: - hash = hashlib.md5(file.read()).hexdigest() + while chunk := file.read(1048576): + hash.update(chunk) + hash = hash.hexdigest() if with_name: hash = f'{hash} {os.path.basename(path)}' From 4a8cd3fa59110c6f72b0c52608e48337315dc53a Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Tue, 4 Mar 2025 06:00:24 +0000 Subject: [PATCH 08/13] fix unhandled modalities --- python/lib/import_dicom_study/summary_get.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/lib/import_dicom_study/summary_get.py b/python/lib/import_dicom_study/summary_get.py index b3233629c..bf04fe20d 100644 --- a/python/lib/import_dicom_study/summary_get.py +++ b/python/lib/import_dicom_study/summary_get.py @@ -40,6 +40,15 @@ def get_dicom_study_summary(dicom_study_dir_path: str, verbose: bool): if study_info is None: study_info = get_dicom_study_info(dicom) + modality = read_value_none(dicom, 'Modality') + if modality is None: + print(f"Found no modality for DICOM file '{file_rel_path}'.") + raise pydicom.errors.InvalidDicomError + + if modality != 'MR' and modality != 'PT': + print(f"Found unhandled modality '{modality}' for DICOM file '{file_rel_path}'.") + raise pydicom.errors.InvalidDicomError + dicom_files.append(get_dicom_file_info(dicom)) acquisition_key = DicomStudyAcquisitionKey( From 4fbe555e757ec203010edee0f105a924e4f7f1ca Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Fri, 7 Mar 2025 14:47:36 +0000 Subject: [PATCH 09/13] use new lib.util module --- pyproject.toml | 1 - python/lib/import_dicom_study/summary_get.py | 11 ++++++----- python/lib/import_dicom_study/text.py | 20 +++++--------------- python/lib/util.py | 14 -------------- python/scripts/import_dicom_study.py | 12 +++++++----- 5 files changed, 18 insertions(+), 40 deletions(-) delete mode 100644 python/lib/util.py diff --git a/pyproject.toml b/pyproject.toml index 84fdf2887..cfbc1e2a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ include = [ "python/lib/make_env.py", "python/lib/scanner.py", "python/lib/validate_subject_info.py", - "python/lib/util.py", "python/scripts/import_dicom_study.py", "python/scripts/summarize_dicom_study.py", ] diff --git a/python/lib/import_dicom_study/summary_get.py b/python/lib/import_dicom_study/summary_get.py index bf04fe20d..c554e4d0a 100644 --- a/python/lib/import_dicom_study/summary_get.py +++ b/python/lib/import_dicom_study/summary_get.py @@ -14,8 +14,9 @@ DicomStudyScanner, DicomStudySummary, ) -from lib.import_dicom_study.text import make_hash, read_dicom_date_none -from lib.util import iter_all_files +from lib.import_dicom_study.text import read_dicom_date_none +from lib.util.crypto import compute_file_md5_hash +from lib.util.fs import iter_all_dir_files def get_dicom_study_summary(dicom_study_dir_path: str, verbose: bool): @@ -28,7 +29,7 @@ def get_dicom_study_summary(dicom_study_dir_path: str, verbose: bool): other_files: list[DicomStudyOtherFile] = [] acquisitions_dict: dict[DicomStudyAcquisitionKey, DicomStudyAcquisition] = dict() - file_rel_paths = list(iter_all_files(dicom_study_dir_path)) + file_rel_paths = list(iter_all_dir_files(dicom_study_dir_path)) for i, file_rel_path in enumerate(file_rel_paths, start=1): if verbose: print(f"Processing file '{file_rel_path}' ({i}/{len(file_rel_paths)})") @@ -114,7 +115,7 @@ def get_dicom_file_info(dicom: pydicom.Dataset) -> DicomStudyDicomFile: return DicomStudyDicomFile( os.path.basename(dicom.filename), - make_hash(dicom.filename), + compute_file_md5_hash(dicom.filename), read_value_none(dicom, 'SeriesNumber'), read_value_none(dicom, 'SeriesInstanceUID'), read_value_none(dicom, 'SeriesDescription'), @@ -132,7 +133,7 @@ def get_other_file_info(file_path: str) -> DicomStudyOtherFile: return DicomStudyOtherFile( os.path.basename(file_path), - make_hash(file_path), + compute_file_md5_hash(file_path), ) diff --git a/python/lib/import_dicom_study/text.py b/python/lib/import_dicom_study/text.py index a317ef52b..b5f3004fe 100644 --- a/python/lib/import_dicom_study/text.py +++ b/python/lib/import_dicom_study/text.py @@ -3,10 +3,11 @@ different types of values. """ -import hashlib import os from datetime import date, datetime +from lib.util.crypto import compute_file_md5_hash + def write_value(value: str | int | float | None): if value is None: @@ -65,20 +66,9 @@ def read_float_none(string: str | None): return float(string) -def make_hash(path: str, with_name: bool = False): +def compute_md5_hash_with_name(path: str): """ - Get the MD5 sum hash of a file, with or without the filename appended. + Get the MD5 sum hash of a file with the filename appended. """ - # Since the file given to this function can be up to several gigabytes, we read it in chunks to - # avoid running out of memory. - hash = hashlib.md5() - with open(path, 'rb') as file: - while chunk := file.read(1048576): - hash.update(chunk) - hash = hash.hexdigest() - - if with_name: - hash = f'{hash} {os.path.basename(path)}' - - return hash + return f'{compute_file_md5_hash(path)} {os.path.basename(path)}' diff --git a/python/lib/util.py b/python/lib/util.py deleted file mode 100644 index 296423e02..000000000 --- a/python/lib/util.py +++ /dev/null @@ -1,14 +0,0 @@ -import os -from collections.abc import Generator - - -def iter_all_files(dir_path: str) -> Generator[str, None, None]: - """ - Iterate through all the files in a directory recursively, and yield the path of each file - relative to that directory. - """ - - for sub_dir_path, _, file_names in os.walk(dir_path): - for file_name in file_names: - file_path = os.path.join(sub_dir_path, file_name) - yield os.path.relpath(file_path, start=dir_path) diff --git a/python/scripts/import_dicom_study.py b/python/scripts/import_dicom_study.py index fae12cc1f..492ab56f7 100755 --- a/python/scripts/import_dicom_study.py +++ b/python/scripts/import_dicom_study.py @@ -25,7 +25,7 @@ from lib.logging import log, log_error_exit, log_warning from lib.lorisgetopt import LorisGetOpt from lib.make_env import make_env -from lib.util import iter_all_files +from lib.util.fs import iter_all_dir_files from lib.validate_subject_info import validate_subject_info @@ -236,14 +236,14 @@ def main() -> None: log(env, "Copying the DICOM files into a new tar archive...") with tarfile.open(tar_path, 'w') as tar: - for file_rel_path in iter_all_files(args.source): + for file_rel_path in iter_all_dir_files(args.source): file_path = os.path.join(args.source, file_rel_path) file_tar_path = os.path.join(os.path.basename(args.source), file_rel_path) tar.add(file_path, arcname=file_tar_path) log(env, "Calculating the tar archive MD5 sum...") - tar_md5_sum = lib.import_dicom_study.text.make_hash(tar_path, True) + tar_md5_sum = lib.import_dicom_study.text.compute_md5_hash_with_name(tar_path) log(env, "Zipping the tar archive... (may take a long time)") @@ -255,7 +255,7 @@ def main() -> None: log(env, "Calculating the zipped tar archive MD5 sum...") - zip_md5_sum = lib.import_dicom_study.text.make_hash(zip_path, True) + zip_md5_sum = lib.import_dicom_study.text.compute_md5_hash_with_name(zip_path) log(env, "Creating DICOM study import log...") @@ -282,7 +282,9 @@ def main() -> None: log(env, "Calculating final DICOM study archive MD5 sum...") - dicom_import_log.archive_md5_sum = lib.import_dicom_study.text.make_hash(dicom_import_log.target_path, True) + dicom_import_log.archive_md5_sum = lib.import_dicom_study.text.compute_md5_hash_with_name( + dicom_import_log.target_path + ) if args.insert: log(env, "Inserting the DICOM study in the LORIS database...") From 62fc93df6341fb984e028558d59afc5ef54e6bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9cile=20Madjar?= Date: Sat, 8 Mar 2025 02:01:44 -0500 Subject: [PATCH 10/13] Add support to be able to read headers from enhanced DICOMs (#7) * print * print * print * print * add support for enhanced DICOMs * add support for enhanced DICOMs --- python/lib/import_dicom_study/summary_get.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/lib/import_dicom_study/summary_get.py b/python/lib/import_dicom_study/summary_get.py index c554e4d0a..150b2ea22 100644 --- a/python/lib/import_dicom_study/summary_get.py +++ b/python/lib/import_dicom_study/summary_get.py @@ -178,6 +178,10 @@ def read_value_none(dicom: pydicom.Dataset, tag: str): """ if tag not in dicom: + for elem in dicom.iterall(): + # to find header information in enhanced DICOMs, need to look into subheaders + if elem.tag == tag: + return elem.value return None return dicom[tag].value or None From ecfcd6da2334770f8a5c91ef92743e7e0675dc20 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Mon, 10 Mar 2025 11:01:32 +0000 Subject: [PATCH 11/13] rework associate files with series --- python/lib/db/queries/dicom_archive.py | 21 ---- .../lib/import_dicom_study/dicom_database.py | 69 +++++------ python/lib/import_dicom_study/summary_get.py | 96 ++------------- python/lib/import_dicom_study/summary_type.py | 30 +++-- .../lib/import_dicom_study/summary_write.py | 110 +++++++++++++++--- python/lib/util/iter.py | 35 +++++- .../tests/unit/db/query/test_dicom_archive.py | 18 +-- 7 files changed, 182 insertions(+), 197 deletions(-) diff --git a/python/lib/db/queries/dicom_archive.py b/python/lib/db/queries/dicom_archive.py index 18ddc29cb..f58b6fdb6 100644 --- a/python/lib/db/queries/dicom_archive.py +++ b/python/lib/db/queries/dicom_archive.py @@ -52,27 +52,6 @@ def delete_dicom_archive_file_series(db: Database, dicom_archive: DbDicomArchive .where(DbDicomArchiveSeries.archive_id == dicom_archive.id)) -def get_dicom_archive_series_with_file_info( - db: Database, - series_uid: str, - series_number: int, - echo_time: float | None, - sequence_name: str | None, -): - """ - Get a DICOM archive series from the database using its file information, or raise an exception - if no DICOM archive series is found. - """ - - query = select(DbDicomArchiveSeries) \ - .where(DbDicomArchiveSeries.series_uid == series_uid) \ - .where(DbDicomArchiveSeries.series_number == series_number) \ - .where(DbDicomArchiveSeries.echo_time == echo_time) \ - .where(DbDicomArchiveSeries.sequence_name == sequence_name) - - return db.execute(query).scalar_one() - - def try_get_dicom_archive_series_with_series_uid_echo_time( db: Database, series_uid: str, diff --git a/python/lib/import_dicom_study/dicom_database.py b/python/lib/import_dicom_study/dicom_database.py index d0a671495..feaee82b5 100644 --- a/python/lib/import_dicom_study/dicom_database.py +++ b/python/lib/import_dicom_study/dicom_database.py @@ -5,10 +5,11 @@ from lib.db.models.dicom_archive import DbDicomArchive from lib.db.models.dicom_archive_file import DbDicomArchiveFile from lib.db.models.dicom_archive_series import DbDicomArchiveSeries -from lib.db.queries.dicom_archive import delete_dicom_archive_file_series, get_dicom_archive_series_with_file_info +from lib.db.queries.dicom_archive import delete_dicom_archive_file_series from lib.import_dicom_study.import_log import DicomStudyImportLog, write_dicom_study_import_log_to_string from lib.import_dicom_study.summary_type import DicomStudySummary from lib.import_dicom_study.summary_write import write_dicom_study_summary +from lib.util.iter import count, flatten def insert_dicom_archive( @@ -73,8 +74,8 @@ def populate_dicom_archive( dicom_archive.last_update = None dicom_archive.date_acquired = dicom_summary.info.scan_date dicom_archive.date_last_archived = datetime.now() - dicom_archive.acquisition_count = len(dicom_summary.acquisitions) - dicom_archive.dicom_file_count = len(dicom_summary.dicom_files) + dicom_archive.acquisition_count = len(dicom_summary.dicom_series_files) + dicom_archive.dicom_file_count = count(flatten(dicom_summary.dicom_series_files.values())) dicom_archive.non_dicom_file_count = len(dicom_summary.other_files) dicom_archive.md5_sum_dicom_only = dicom_import_log.tarball_md5_sum dicom_archive.md5_sum_archive = dicom_import_log.archive_md5_sum @@ -100,42 +101,36 @@ def insert_files_series(db: Database, dicom_archive: DbDicomArchive, dicom_summa Insert the DICOM files and series related to a DICOM archive in the database. """ - for acquisition in dicom_summary.acquisitions: - db.add(DbDicomArchiveSeries( + for dicom_series, dicom_files in dicom_summary.dicom_series_files.items(): + dicom_series = DbDicomArchiveSeries( archive_id = dicom_archive.id, - series_number = acquisition.series_number, - series_description = acquisition.series_description, - sequence_name = acquisition.sequence_name, - echo_time = acquisition.echo_time, - repetition_time = acquisition.repetition_time, - inversion_time = acquisition.inversion_time, - slice_thickness = acquisition.slice_thickness, - phase_encoding = acquisition.phase_encoding, - number_of_files = acquisition.number_of_files, - series_uid = acquisition.series_uid, - modality = acquisition.modality, - )) - - db.commit() - - for dicom_file in dicom_summary.dicom_files: - dicom_series = get_dicom_archive_series_with_file_info( - db, - dicom_file.series_uid or '', - dicom_file.series_number or 1, - dicom_file.echo_time, - dicom_file.sequence_name, + series_number = dicom_series.series_number, + series_description = dicom_series.series_description, + sequence_name = dicom_series.sequence_name, + echo_time = dicom_series.echo_time, + repetition_time = dicom_series.repetition_time, + inversion_time = dicom_series.inversion_time, + slice_thickness = dicom_series.slice_thickness, + phase_encoding = dicom_series.phase_encoding, + number_of_files = len(dicom_files), + series_uid = dicom_series.series_uid, + modality = dicom_series.modality, ) - db.add(DbDicomArchiveFile( - archive_id = dicom_archive.id, - series_number = dicom_file.series_number, - file_number = dicom_file.file_number, - echo_number = dicom_file.echo_number, - series_description = dicom_file.series_description, - md5_sum = dicom_file.md5_sum, - file_name = dicom_file.file_name, - series_id = dicom_series.id, - )) + # Populate the DICOM series ID. + db.add(dicom_series) + db.commit() + + for dicom_file in dicom_files: + db.add(DbDicomArchiveFile( + archive_id = dicom_archive.id, + series_number = dicom_file.series_number, + file_number = dicom_file.file_number, + echo_number = dicom_file.echo_number, + series_description = dicom_file.series_description, + md5_sum = dicom_file.md5_sum, + file_name = dicom_file.file_name, + series_id = dicom_series.id, + )) db.commit() diff --git a/python/lib/import_dicom_study/summary_get.py b/python/lib/import_dicom_study/summary_get.py index 150b2ea22..24352f34b 100644 --- a/python/lib/import_dicom_study/summary_get.py +++ b/python/lib/import_dicom_study/summary_get.py @@ -1,13 +1,11 @@ import os -from functools import cmp_to_key import pydicom import pydicom.errors from lib.import_dicom_study.summary_type import ( - DicomStudyAcquisition, - DicomStudyAcquisitionKey, DicomStudyDicomFile, + DicomStudyDicomSeries, DicomStudyInfo, DicomStudyOtherFile, DicomStudyPatient, @@ -25,9 +23,8 @@ def get_dicom_study_summary(dicom_study_dir_path: str, verbose: bool): """ study_info = None - dicom_files: list[DicomStudyDicomFile] = [] + dicom_series_files: dict[DicomStudyDicomSeries, list[DicomStudyDicomFile]] = {} other_files: list[DicomStudyOtherFile] = [] - acquisitions_dict: dict[DicomStudyAcquisitionKey, DicomStudyAcquisition] = dict() file_rel_paths = list(iter_all_dir_files(dicom_study_dir_path)) for i, file_rel_path in enumerate(file_rel_paths, start=1): @@ -50,30 +47,19 @@ def get_dicom_study_summary(dicom_study_dir_path: str, verbose: bool): print(f"Found unhandled modality '{modality}' for DICOM file '{file_rel_path}'.") raise pydicom.errors.InvalidDicomError - dicom_files.append(get_dicom_file_info(dicom)) + dicom_series = get_dicom_series_info(dicom) + if dicom_series not in dicom_series_files: + dicom_series_files[dicom_series] = [] - acquisition_key = DicomStudyAcquisitionKey( - series_number = dicom.SeriesNumber, - echo_numbers = read_value_none(dicom, 'EchoNumbers'), - sequence_name = read_value_none(dicom, 'SequenceName'), - ) - - if acquisition_key not in acquisitions_dict: - acquisitions_dict[acquisition_key] = get_acquisition_info(dicom) - - acquisitions_dict[acquisition_key].number_of_files += 1 + dicom_file = get_dicom_file_info(dicom) + dicom_series_files[dicom_series].append(dicom_file) except pydicom.errors.InvalidDicomError: other_files.append(get_other_file_info(file_path)) if study_info is None: raise Exception("Found no DICOM file in the DICOM study directory.") - acquisitions = list(acquisitions_dict.values()) - - dicom_files.sort(key=cmp_to_key(compare_dicom_files)) - acquisitions.sort(key=cmp_to_key(compare_acquisitions)) - - return DicomStudySummary(study_info, acquisitions, dicom_files, other_files) + return DicomStudySummary(study_info, dicom_series_files, other_files) def get_dicom_study_info(dicom: pydicom.Dataset) -> DicomStudyInfo: @@ -137,12 +123,12 @@ def get_other_file_info(file_path: str) -> DicomStudyOtherFile: ) -def get_acquisition_info(dicom: pydicom.Dataset): +def get_dicom_series_info(dicom: pydicom.Dataset): """ - Get information about an acquisition within a DICOM study. + Get information about a DICOM series within a DICOM study. """ - return DicomStudyAcquisition( + return DicomStudyDicomSeries( read_value(dicom, 'SeriesNumber'), read_value_none(dicom, 'SeriesInstanceUID'), read_value_none(dicom, 'SeriesDescription'), @@ -152,7 +138,6 @@ def get_acquisition_info(dicom: pydicom.Dataset): read_value_none(dicom, 'InversionTime'), read_value_none(dicom, 'SliceThickness'), read_value_none(dicom, 'InPlanePhaseEncodingDirection'), - 0, read_value_none(dicom, 'Modality'), ) @@ -185,62 +170,3 @@ def read_value_none(dicom: pydicom.Dataset, tag: str): return None return dicom[tag].value or None - - -# Comparison functions used to sort the various DICOM study information objects. - -def compare_dicom_files(a: DicomStudyDicomFile, b: DicomStudyDicomFile): - """ - Compare two DICOM file informations in accordance with `functools.cmp_to_key`. - """ - - return \ - compare_int_none(a.series_number, b.series_number) or \ - compare_int_none(a.file_number, b.file_number) or \ - compare_int_none(a.echo_number, b.echo_number) - - -def compare_acquisitions(a: DicomStudyAcquisition, b: DicomStudyAcquisition): - """ - Compare two acquisition informations in accordance with `functools.cmp_to_key`. - """ - - return \ - a.series_number - b.series_number or \ - compare_string_none(a.sequence_name, b.sequence_name) - - -def compare_int_none(a: int | None, b: int | None): - """ - Compare two nullable integers in accordance with `functools.cmp_to_key`. - """ - - match a, b: - case None, None: - return 0 - case _, None: - return -1 - case None, _: - return 1 - case a, b: - return a - b - - -def compare_string_none(a: str | None, b: str | None): - """ - Compare two nullable strings in accordance with `functools.cmp_to_key`. - """ - - match a, b: - case None, None: - return 0 - case _, None: - return -1 - case None, _: - return 1 - case a, b if a < b: - return -1 - case a, b if a > b: - return 1 - case a, b: - return 0 diff --git a/python/lib/import_dicom_study/summary_type.py b/python/lib/import_dicom_study/summary_type.py index 6deeec51c..e54600767 100644 --- a/python/lib/import_dicom_study/summary_type.py +++ b/python/lib/import_dicom_study/summary_type.py @@ -67,10 +67,19 @@ class DicomStudyOtherFile: md5_sum: str -@dataclass -class DicomStudyAcquisition: +# This dataclass does not correspond to a "real" DICOM series, as a DICOM series may actually have +# files that have different echo times, inversion times, repetition times... (for instance in +# multi-echo series). +# Generally, a "real" DICOM series should be uniquely identifiable by using the series instance UID +# DICOM attribute. +# This class corresponds more to a LORIS database DICOM series entry, which is a unique tuple of +# some parameters of the DICOM files of a study (including the DICOM series instance UID). As such, +# there is a 1-to-n relationship between a "real" DICOM series, and the LORIS database DICOM series +# entries. +@dataclass(frozen=True) +class DicomStudyDicomSeries: """ - Information about an acquisition within a DICOM study. + Information about an DICOM series within a DICOM study. """ series_number: int @@ -82,21 +91,9 @@ class DicomStudyAcquisition: inversion_time: float | None # In Milliseconds slice_thickness: float | None # In Millimeters phase_encoding: str | None - number_of_files: int modality: str | None -@dataclass(frozen=True) -class DicomStudyAcquisitionKey: - """ - Identifying information about an acquisition within a DICOM study. - """ - - series_number: int - echo_numbers: str | None - sequence_name: str | None - - @dataclass class DicomStudySummary: """ @@ -104,6 +101,5 @@ class DicomStudySummary: """ info: DicomStudyInfo - acquisitions: list[DicomStudyAcquisition] - dicom_files: list[DicomStudyDicomFile] + dicom_series_files: dict[DicomStudyDicomSeries, list[DicomStudyDicomFile]] other_files: list[DicomStudyOtherFile] diff --git a/python/lib/import_dicom_study/summary_write.py b/python/lib/import_dicom_study/summary_write.py index f4b2d575e..aad415ebd 100644 --- a/python/lib/import_dicom_study/summary_write.py +++ b/python/lib/import_dicom_study/summary_write.py @@ -1,8 +1,9 @@ import xml.etree.ElementTree as ET +from functools import cmp_to_key from lib.import_dicom_study.summary_type import ( - DicomStudyAcquisition, DicomStudyDicomFile, + DicomStudyDicomSeries, DicomStudyInfo, DicomStudyOtherFile, DicomStudySummary, @@ -10,6 +11,7 @@ from lib.import_dicom_study.text import write_date_none from lib.import_dicom_study.text_dict import DictWriter from lib.import_dicom_study.text_table import TableWriter +from lib.util.iter import count, flatten def write_dicom_study_summary_to_file(dicom_summary: DicomStudySummary, filename: str): @@ -29,9 +31,9 @@ def write_dicom_study_summary(dicom_summary: DicomStudySummary) -> str: xml = ET.Element('STUDY') ET.SubElement(xml, 'STUDY_INFO').text = write_dicom_study_info(dicom_summary.info) - ET.SubElement(xml, 'FILES').text = write_dicom_study_dicom_files(dicom_summary.dicom_files) + ET.SubElement(xml, 'FILES').text = write_dicom_study_dicom_files(dicom_summary.dicom_series_files) ET.SubElement(xml, 'OTHERS').text = write_dicom_study_other_files(dicom_summary.other_files) - ET.SubElement(xml, 'ACQUISITIONS').text = write_dicom_study_acquisitions(dicom_summary.acquisitions) + ET.SubElement(xml, 'ACQUISITIONS').text = write_dicom_study_dicom_series(dicom_summary.dicom_series_files) ET.SubElement(xml, 'SUMMARY').text = write_dicom_study_ending(dicom_summary) ET.indent(xml, space='') return ET.tostring(xml, encoding='unicode') + '\n' @@ -58,11 +60,14 @@ def write_dicom_study_info(info: DicomStudyInfo) -> str: ]).write() -def write_dicom_study_dicom_files(dicom_files: list[DicomStudyDicomFile]) -> str: +def write_dicom_study_dicom_files(dicom_series_files: dict[DicomStudyDicomSeries, list[DicomStudyDicomFile]]) -> str: """ Serialize information about the DICOM files of a DICOM study into a table. """ + dicom_files = list(flatten(dicom_series_files.values())) + dicom_files.sort(key=cmp_to_key(compare_dicom_files)) + writer = TableWriter() writer.append_row(['SN', 'FN', 'EN', 'Series', 'md5sum', 'File name']) for dicom_file in dicom_files: @@ -94,11 +99,14 @@ def write_dicom_study_other_files(other_files: list[DicomStudyOtherFile]) -> str return '\n' + writer.write() -def write_dicom_study_acquisitions(acquisitions: list[DicomStudyAcquisition]) -> str: +def write_dicom_study_dicom_series(dicom_series_files: dict[DicomStudyDicomSeries, list[DicomStudyDicomFile]]) -> str: """ - Serialize information about the acquisitions of a DICOM study into a table. + Serialize information about the DICOM series of a DICOM study into a table. """ + dicom_series_list = list(dicom_series_files.keys()) + dicom_series_list.sort(key=cmp_to_key(compare_dicom_series)) + writer = TableWriter() writer.append_row([ 'Series (SN)', @@ -114,19 +122,21 @@ def write_dicom_study_acquisitions(acquisitions: list[DicomStudyAcquisition]) -> 'Mod' ]) - for acquisition in acquisitions: + for dicom_series in dicom_series_list: + dicom_files = dicom_series_files[dicom_series] + writer.append_row([ - acquisition.series_number, - acquisition.series_description, - acquisition.sequence_name, - acquisition.echo_time, - acquisition.repetition_time, - acquisition.inversion_time, - acquisition.slice_thickness, - acquisition.phase_encoding, - acquisition.number_of_files, - acquisition.series_uid, - acquisition.modality, + dicom_series.series_number, + dicom_series.series_description, + dicom_series.sequence_name, + dicom_series.echo_time, + dicom_series.repetition_time, + dicom_series.inversion_time, + dicom_series.slice_thickness, + dicom_series.phase_encoding, + len(dicom_files), + dicom_series.series_uid, + dicom_series.modality, ]) return '\n' + writer.write() @@ -149,7 +159,69 @@ def write_dicom_study_ending(dicom_summary: DicomStudySummary) -> str: else: age = '' + dicom_files_count = count(flatten(dicom_summary.dicom_series_files.values())) + other_files_count = len(dicom_summary.other_files) + return '\n' + DictWriter([ - ('Total number of files', len(dicom_summary.dicom_files) + len(dicom_summary.other_files)), + ('Total number of files', dicom_files_count + other_files_count), ('Age at scan', age), ]).write() + + +# Comparison functions used to sort the various DICOM study information objects. + +def compare_dicom_files(a: DicomStudyDicomFile, b: DicomStudyDicomFile): + """ + Compare two DICOM file informations in accordance with `functools.cmp_to_key`. + """ + + return \ + compare_int_none(a.series_number, b.series_number) or \ + compare_int_none(a.file_number, b.file_number) or \ + compare_int_none(a.echo_number, b.echo_number) + + +def compare_dicom_series(a: DicomStudyDicomSeries, b: DicomStudyDicomSeries): + """ + Compare two acquisition informations in accordance with `functools.cmp_to_key`. + """ + + return \ + a.series_number - b.series_number or \ + compare_string_none(a.sequence_name, b.sequence_name) + + +def compare_int_none(a: int | None, b: int | None): + """ + Compare two nullable integers in accordance with `functools.cmp_to_key`. + """ + + match a, b: + case None, None: + return 0 + case _, None: + return -1 + case None, _: + return 1 + case a, b: + return a - b + + +def compare_string_none(a: str | None, b: str | None): + """ + Compare two nullable strings in accordance with `functools.cmp_to_key`. + """ + + match a, b: + case None, None: + return 0 + case _, None: + return -1 + case None, _: + return 1 + case a, b if a < b: + return -1 + case a, b if a > b: + return 1 + case a, b: + return 0 diff --git a/python/lib/util/iter.py b/python/lib/util/iter.py index 94ee18c4d..5b243b4e3 100644 --- a/python/lib/util/iter.py +++ b/python/lib/util/iter.py @@ -1,9 +1,30 @@ -from collections.abc import Callable, Iterable, Iterator +from collections.abc import Callable, Iterable, Iterator, Sized from typing import TypeVar T = TypeVar('T') +def count(iterable: Iterable[T]) -> int: + """ + Count the number of elements in an iterable. + + If the iterable is sized, this function uses the `__len__` method. + If the iterable is an iterator, this function consumes the iterator. + """ + + if isinstance(iterable, Sized): + return len(iterable) + + count = 0 + for _ in iterable: + count += 1 + + return count + + +T = TypeVar('T') # type: ignore + + def find(predicate: Callable[[T], bool], iterable: Iterable[T]) -> T | None: """ Find the first element in an iterable that satisfies a predicate, or return `None` if no match @@ -30,3 +51,15 @@ def filter_map(function: Callable[[T], U | None], iterable: Iterable[T]) -> Iter result = function(item) if result is not None: yield result + + +T = TypeVar('T') # type: ignore + + +def flatten(iterables: Iterable[Iterable[T]]) -> Iterator[T]: + """ + Flatten an iterable of iterables into a single iterator. + """ + + for iterable in iterables: + yield from iterable diff --git a/python/tests/unit/db/query/test_dicom_archive.py b/python/tests/unit/db/query/test_dicom_archive.py index 23dfeb5b2..102790257 100644 --- a/python/tests/unit/db/query/test_dicom_archive.py +++ b/python/tests/unit/db/query/test_dicom_archive.py @@ -7,11 +7,7 @@ from lib.db.models.dicom_archive import DbDicomArchive from lib.db.models.dicom_archive_file import DbDicomArchiveFile from lib.db.models.dicom_archive_series import DbDicomArchiveSeries -from lib.db.queries.dicom_archive import ( - delete_dicom_archive_file_series, - get_dicom_archive_series_with_file_info, - try_get_dicom_archive_with_study_uid, -) +from lib.db.queries.dicom_archive import delete_dicom_archive_file_series, try_get_dicom_archive_with_study_uid from tests.util.database import create_test_database @@ -162,15 +158,3 @@ def test_delete_dicom_archive_file_series(setup: Setup): assert setup.db.execute(select(DbDicomArchiveSeries) .where(DbDicomArchiveSeries.archive_id == setup.dicom_archive.id)).first() is None - - -def test_get_dicom_archive_series_with_file_info(setup: Setup): - dicom_archive_series = get_dicom_archive_series_with_file_info( - setup.db, - '1.3.12.2.11.11.11.999.0.0', - 1, - 100, - 'ep_b100', - ) - - assert dicom_archive_series is setup.dicom_archive_series From 222c0482a44214d2d0fd19a1b4a9ae66b4554671 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Tue, 11 Mar 2025 04:23:39 +0000 Subject: [PATCH 12/13] sort dicom series and files before inserting in the database --- python/lib/import_dicom_study/dicom_database.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/python/lib/import_dicom_study/dicom_database.py b/python/lib/import_dicom_study/dicom_database.py index feaee82b5..db486bab3 100644 --- a/python/lib/import_dicom_study/dicom_database.py +++ b/python/lib/import_dicom_study/dicom_database.py @@ -1,4 +1,5 @@ from datetime import datetime +from functools import cmp_to_key from sqlalchemy.orm import Session as Database @@ -8,7 +9,7 @@ from lib.db.queries.dicom_archive import delete_dicom_archive_file_series from lib.import_dicom_study.import_log import DicomStudyImportLog, write_dicom_study_import_log_to_string from lib.import_dicom_study.summary_type import DicomStudySummary -from lib.import_dicom_study.summary_write import write_dicom_study_summary +from lib.import_dicom_study.summary_write import compare_dicom_files, compare_dicom_series, write_dicom_study_summary from lib.util.iter import count, flatten @@ -101,7 +102,14 @@ def insert_files_series(db: Database, dicom_archive: DbDicomArchive, dicom_summa Insert the DICOM files and series related to a DICOM archive in the database. """ - for dicom_series, dicom_files in dicom_summary.dicom_series_files.items(): + # Sort the DICOM series and files to insert them in the correct order. + dicom_series_list = list(dicom_summary.dicom_series_files.keys()) + dicom_series_list.sort(key=cmp_to_key(compare_dicom_series)) + + for dicom_series in dicom_series_list: + dicom_files = dicom_summary.dicom_series_files[dicom_series] + dicom_files.sort(key=cmp_to_key(compare_dicom_files)) + dicom_series = DbDicomArchiveSeries( archive_id = dicom_archive.id, series_number = dicom_series.series_number, From 31581a3b12970ad68fa5a125549b0da88e97a0f5 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Wed, 12 Mar 2025 02:06:26 +0000 Subject: [PATCH 13/13] handle null scanner --- python/lib/import_dicom_study/dicom_database.py | 8 ++++---- python/lib/import_dicom_study/summary_get.py | 8 ++++---- python/lib/import_dicom_study/summary_type.py | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/python/lib/import_dicom_study/dicom_database.py b/python/lib/import_dicom_study/dicom_database.py index db486bab3..90fc9354d 100644 --- a/python/lib/import_dicom_study/dicom_database.py +++ b/python/lib/import_dicom_study/dicom_database.py @@ -85,10 +85,10 @@ def populate_dicom_archive( dicom_archive.tar_type_version = dicom_import_log.archive_version dicom_archive.source_location = dicom_import_log.source_path dicom_archive.archive_location = archive_location - dicom_archive.scanner_manufacturer = dicom_summary.info.scanner.manufacturer - dicom_archive.scanner_model = dicom_summary.info.scanner.model - dicom_archive.scanner_serial_number = dicom_summary.info.scanner.serial_number - dicom_archive.scanner_software_version = dicom_summary.info.scanner.software_version + dicom_archive.scanner_manufacturer = dicom_summary.info.scanner.manufacturer or '' + dicom_archive.scanner_model = dicom_summary.info.scanner.model or '' + dicom_archive.scanner_serial_number = dicom_summary.info.scanner.serial_number or '' + dicom_archive.scanner_software_version = dicom_summary.info.scanner.software_version or '' dicom_archive.session_id = None dicom_archive.upload_attempt = 0 dicom_archive.create_info = write_dicom_study_import_log_to_string(dicom_import_log) diff --git a/python/lib/import_dicom_study/summary_get.py b/python/lib/import_dicom_study/summary_get.py index 24352f34b..b5f665dfc 100644 --- a/python/lib/import_dicom_study/summary_get.py +++ b/python/lib/import_dicom_study/summary_get.py @@ -78,10 +78,10 @@ def get_dicom_study_info(dicom: pydicom.Dataset) -> DicomStudyInfo: ) scanner = DicomStudyScanner( - read_value(dicom, 'Manufacturer'), - read_value(dicom, 'ManufacturerModelName'), - read_value(dicom, 'DeviceSerialNumber'), - read_value(dicom, 'SoftwareVersions'), + read_value_none(dicom, 'Manufacturer'), + read_value_none(dicom, 'ManufacturerModelName'), + read_value_none(dicom, 'DeviceSerialNumber'), + read_value_none(dicom, 'SoftwareVersions'), ) return DicomStudyInfo( diff --git a/python/lib/import_dicom_study/summary_type.py b/python/lib/import_dicom_study/summary_type.py index e54600767..f5d2c2216 100644 --- a/python/lib/import_dicom_study/summary_type.py +++ b/python/lib/import_dicom_study/summary_type.py @@ -20,10 +20,10 @@ class DicomStudyScanner: Information about a DICOM study scanner. """ - manufacturer: str - model: str - serial_number: str - software_version: str + manufacturer: str | None + model: str | None + serial_number: str | None + software_version: str | None @dataclass