diff --git a/install/requirements/requirements.txt b/install/requirements/requirements.txt index 5f84aaaaa..96a615e7f 100644 --- a/install/requirements/requirements.txt +++ b/install/requirements/requirements.txt @@ -11,6 +11,7 @@ nose numpy protobuf>=3.0.0 pybids==0.17.0 +pydicom pyright pytest python-dateutil diff --git a/pyproject.toml b/pyproject.toml index eb0285357..cfbc1e2a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ include = [ "python/tests", "python/lib/db", "python/lib/exception", + "python/lib/import_dicom_study", "python/lib/util", "python/lib/config_file.py", "python/lib/env.py", @@ -28,6 +29,8 @@ include = [ "python/lib/make_env.py", "python/lib/scanner.py", "python/lib/validate_subject_info.py", + "python/scripts/import_dicom_study.py", + "python/scripts/summarize_dicom_study.py", ] typeCheckingMode = "strict" reportMissingTypeStubs = "none" diff --git a/python/lib/db/queries/dicom_archive.py b/python/lib/db/queries/dicom_archive.py index 18ddc29cb..f58b6fdb6 100644 --- a/python/lib/db/queries/dicom_archive.py +++ b/python/lib/db/queries/dicom_archive.py @@ -52,27 +52,6 @@ def delete_dicom_archive_file_series(db: Database, dicom_archive: DbDicomArchive .where(DbDicomArchiveSeries.archive_id == dicom_archive.id)) -def get_dicom_archive_series_with_file_info( - db: Database, - series_uid: str, - series_number: int, - echo_time: float | None, - sequence_name: str | None, -): - """ - Get a DICOM archive series from the database using its file information, or raise an exception - if no DICOM archive series is found. - """ - - query = select(DbDicomArchiveSeries) \ - .where(DbDicomArchiveSeries.series_uid == series_uid) \ - .where(DbDicomArchiveSeries.series_number == series_number) \ - .where(DbDicomArchiveSeries.echo_time == echo_time) \ - .where(DbDicomArchiveSeries.sequence_name == sequence_name) - - return db.execute(query).scalar_one() - - def try_get_dicom_archive_series_with_series_uid_echo_time( db: Database, series_uid: str, diff --git a/python/lib/import_dicom_study/dicom_database.py b/python/lib/import_dicom_study/dicom_database.py new file mode 100644 index 000000000..90fc9354d --- /dev/null +++ b/python/lib/import_dicom_study/dicom_database.py @@ -0,0 +1,144 @@ +from datetime import datetime +from functools import cmp_to_key + +from sqlalchemy.orm import Session as Database + +from lib.db.models.dicom_archive import DbDicomArchive +from lib.db.models.dicom_archive_file import DbDicomArchiveFile +from lib.db.models.dicom_archive_series import DbDicomArchiveSeries +from lib.db.queries.dicom_archive import delete_dicom_archive_file_series +from lib.import_dicom_study.import_log import DicomStudyImportLog, write_dicom_study_import_log_to_string +from lib.import_dicom_study.summary_type import DicomStudySummary +from lib.import_dicom_study.summary_write import compare_dicom_files, compare_dicom_series, write_dicom_study_summary +from lib.util.iter import count, flatten + + +def insert_dicom_archive( + db: Database, + dicom_summary: DicomStudySummary, + dicom_import_log: DicomStudyImportLog, + archive_location: str, +): + """ + Insert a DICOM archive in the database. + """ + + dicom_archive = DbDicomArchive() + populate_dicom_archive(dicom_archive, dicom_summary, dicom_import_log, archive_location) + dicom_archive.date_first_archived = datetime.now() + db.add(dicom_archive) + db.commit() + insert_files_series(db, dicom_archive, dicom_summary) + return dicom_archive + + +def update_dicom_archive( + db: Database, + dicom_archive: DbDicomArchive, + dicom_summary: DicomStudySummary, + dicom_import_log: DicomStudyImportLog, + archive_location: str, +): + """ + Update a DICOM archive in the database. + """ + + # Delete the associated database DICOM files and series. + delete_dicom_archive_file_series(db, dicom_archive) + + # Update the database record with the new DICOM information. + populate_dicom_archive(dicom_archive, dicom_summary, dicom_import_log, archive_location) + db.commit() + + # Insert the new DICOM files and series. + insert_files_series(db, dicom_archive, dicom_summary) + + +def populate_dicom_archive( + dicom_archive: DbDicomArchive, + dicom_summary: DicomStudySummary, + dicom_import_log: DicomStudyImportLog, + archive_location: str, +): + """ + Populate a DICOM archive database object with information from its DICOM summary and DICOM + study import log. + """ + + dicom_archive.study_uid = dicom_summary.info.study_uid + dicom_archive.patient_id = dicom_summary.info.patient.id + dicom_archive.patient_name = dicom_summary.info.patient.name + dicom_archive.patient_birthdate = dicom_summary.info.patient.birth_date + dicom_archive.patient_sex = dicom_summary.info.patient.sex + dicom_archive.neuro_db_center_name = None + dicom_archive.center_name = dicom_summary.info.institution or '' + dicom_archive.last_update = None + dicom_archive.date_acquired = dicom_summary.info.scan_date + dicom_archive.date_last_archived = datetime.now() + dicom_archive.acquisition_count = len(dicom_summary.dicom_series_files) + dicom_archive.dicom_file_count = count(flatten(dicom_summary.dicom_series_files.values())) + dicom_archive.non_dicom_file_count = len(dicom_summary.other_files) + dicom_archive.md5_sum_dicom_only = dicom_import_log.tarball_md5_sum + dicom_archive.md5_sum_archive = dicom_import_log.archive_md5_sum + dicom_archive.creating_user = dicom_import_log.creator_name + dicom_archive.sum_type_version = dicom_import_log.summary_version + dicom_archive.tar_type_version = dicom_import_log.archive_version + dicom_archive.source_location = dicom_import_log.source_path + dicom_archive.archive_location = archive_location + dicom_archive.scanner_manufacturer = dicom_summary.info.scanner.manufacturer or '' + dicom_archive.scanner_model = dicom_summary.info.scanner.model or '' + dicom_archive.scanner_serial_number = dicom_summary.info.scanner.serial_number or '' + dicom_archive.scanner_software_version = dicom_summary.info.scanner.software_version or '' + dicom_archive.session_id = None + dicom_archive.upload_attempt = 0 + dicom_archive.create_info = write_dicom_study_import_log_to_string(dicom_import_log) + dicom_archive.acquisition_metadata = write_dicom_study_summary(dicom_summary) + dicom_archive.date_sent = None + dicom_archive.pending_transfer = False + + +def insert_files_series(db: Database, dicom_archive: DbDicomArchive, dicom_summary: DicomStudySummary): + """ + Insert the DICOM files and series related to a DICOM archive in the database. + """ + + # Sort the DICOM series and files to insert them in the correct order. + dicom_series_list = list(dicom_summary.dicom_series_files.keys()) + dicom_series_list.sort(key=cmp_to_key(compare_dicom_series)) + + for dicom_series in dicom_series_list: + dicom_files = dicom_summary.dicom_series_files[dicom_series] + dicom_files.sort(key=cmp_to_key(compare_dicom_files)) + + dicom_series = DbDicomArchiveSeries( + archive_id = dicom_archive.id, + series_number = dicom_series.series_number, + series_description = dicom_series.series_description, + sequence_name = dicom_series.sequence_name, + echo_time = dicom_series.echo_time, + repetition_time = dicom_series.repetition_time, + inversion_time = dicom_series.inversion_time, + slice_thickness = dicom_series.slice_thickness, + phase_encoding = dicom_series.phase_encoding, + number_of_files = len(dicom_files), + series_uid = dicom_series.series_uid, + modality = dicom_series.modality, + ) + + # Populate the DICOM series ID. + db.add(dicom_series) + db.commit() + + for dicom_file in dicom_files: + db.add(DbDicomArchiveFile( + archive_id = dicom_archive.id, + series_number = dicom_file.series_number, + file_number = dicom_file.file_number, + echo_number = dicom_file.echo_number, + series_description = dicom_file.series_description, + md5_sum = dicom_file.md5_sum, + file_name = dicom_file.file_name, + series_id = dicom_series.id, + )) + + db.commit() diff --git a/python/lib/import_dicom_study/import_log.py b/python/lib/import_dicom_study/import_log.py new file mode 100644 index 000000000..2b88972cd --- /dev/null +++ b/python/lib/import_dicom_study/import_log.py @@ -0,0 +1,76 @@ +import os +import socket +from dataclasses import dataclass +from datetime import datetime + +from lib.import_dicom_study.text_dict import DictWriter + + +@dataclass +class DicomStudyImportLog: + """ + Information about the past import of a DICOM study. + """ + + source_path: str + target_path: str + creator_host: str + creator_os: str + creator_name: str + archive_date: str + summary_version: int + archive_version: int + tarball_md5_sum: str + zipball_md5_sum: str + archive_md5_sum: str + + +def write_dicom_study_import_log_to_string(import_log: DicomStudyImportLog): + """ + Serialize a DICOM study import log into a string. + """ + + return DictWriter([ + ("Taken from dir", import_log.source_path), + ("Archive target location", import_log.target_path), + ("Name of creating host", import_log.creator_host), + ("Name of host OS", import_log.creator_os), + ("Created by user", import_log.creator_name), + ("Archived on", import_log.archive_date), + ("dicomSummary version", import_log.summary_version), + ("dicomTar version", import_log.archive_version), + ("md5sum for DICOM tarball", import_log.tarball_md5_sum), + ("md5sum for DICOM tarball gzipped", import_log.zipball_md5_sum), + ("md5sum for complete archive", import_log.archive_md5_sum), + ]).write() + + +def write_dicom_study_import_log_to_file(import_log: DicomStudyImportLog, file_path: str): + """ + Serialize a DICOM study import log into a text file. + """ + + string = write_dicom_study_import_log_to_string(import_log) + with open(file_path, "w") as file: + file.write(string) + + +def make_dicom_study_import_log(source: str, target: str, tarball_md5_sum: str, zipball_md5_sum: str): + """ + Create a DICOM study import log from the provided arguments about a DICOM study, as well as the + current execution environment. + """ + + return DicomStudyImportLog( + source, + target, + socket.gethostname(), + os.uname().sysname, + os.environ["USER"], + datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S"), + 2, + 2, + tarball_md5_sum, + zipball_md5_sum, + "Provided in database only", + ) diff --git a/python/lib/import_dicom_study/summary_get.py b/python/lib/import_dicom_study/summary_get.py new file mode 100644 index 000000000..b5f665dfc --- /dev/null +++ b/python/lib/import_dicom_study/summary_get.py @@ -0,0 +1,172 @@ +import os + +import pydicom +import pydicom.errors + +from lib.import_dicom_study.summary_type import ( + DicomStudyDicomFile, + DicomStudyDicomSeries, + DicomStudyInfo, + DicomStudyOtherFile, + DicomStudyPatient, + DicomStudyScanner, + DicomStudySummary, +) +from lib.import_dicom_study.text import read_dicom_date_none +from lib.util.crypto import compute_file_md5_hash +from lib.util.fs import iter_all_dir_files + + +def get_dicom_study_summary(dicom_study_dir_path: str, verbose: bool): + """ + Get information about a DICOM study by reading the files in the DICOM study directory. + """ + + study_info = None + dicom_series_files: dict[DicomStudyDicomSeries, list[DicomStudyDicomFile]] = {} + other_files: list[DicomStudyOtherFile] = [] + + file_rel_paths = list(iter_all_dir_files(dicom_study_dir_path)) + for i, file_rel_path in enumerate(file_rel_paths, start=1): + if verbose: + print(f"Processing file '{file_rel_path}' ({i}/{len(file_rel_paths)})") + + file_path = os.path.join(dicom_study_dir_path, file_rel_path) + + try: + dicom = pydicom.dcmread(file_path) # type: ignore + if study_info is None: + study_info = get_dicom_study_info(dicom) + + modality = read_value_none(dicom, 'Modality') + if modality is None: + print(f"Found no modality for DICOM file '{file_rel_path}'.") + raise pydicom.errors.InvalidDicomError + + if modality != 'MR' and modality != 'PT': + print(f"Found unhandled modality '{modality}' for DICOM file '{file_rel_path}'.") + raise pydicom.errors.InvalidDicomError + + dicom_series = get_dicom_series_info(dicom) + if dicom_series not in dicom_series_files: + dicom_series_files[dicom_series] = [] + + dicom_file = get_dicom_file_info(dicom) + dicom_series_files[dicom_series].append(dicom_file) + except pydicom.errors.InvalidDicomError: + other_files.append(get_other_file_info(file_path)) + + if study_info is None: + raise Exception("Found no DICOM file in the DICOM study directory.") + + return DicomStudySummary(study_info, dicom_series_files, other_files) + + +def get_dicom_study_info(dicom: pydicom.Dataset) -> DicomStudyInfo: + """ + Get general information about a DICOM study from one of its DICOM files. + """ + + birth_date = read_dicom_date_none(read_value_none(dicom, 'PatientBirthDate')) + scan_date = read_dicom_date_none(read_value_none(dicom, 'StudyDate')) + + patient = DicomStudyPatient( + read_value(dicom, 'PatientID'), + read_value(dicom, 'PatientName'), + read_value_none(dicom, 'PatientSex'), + birth_date, + ) + + scanner = DicomStudyScanner( + read_value_none(dicom, 'Manufacturer'), + read_value_none(dicom, 'ManufacturerModelName'), + read_value_none(dicom, 'DeviceSerialNumber'), + read_value_none(dicom, 'SoftwareVersions'), + ) + + return DicomStudyInfo( + read_value(dicom, 'StudyInstanceUID'), + patient, + scanner, + scan_date, + read_value_none(dicom, 'InstitutionName'), + read_value(dicom, 'Modality'), + ) + + +def get_dicom_file_info(dicom: pydicom.Dataset) -> DicomStudyDicomFile: + """ + Get information about a DICOM file within a DICOM study. + """ + + return DicomStudyDicomFile( + os.path.basename(dicom.filename), + compute_file_md5_hash(dicom.filename), + read_value_none(dicom, 'SeriesNumber'), + read_value_none(dicom, 'SeriesInstanceUID'), + read_value_none(dicom, 'SeriesDescription'), + read_value_none(dicom, 'InstanceNumber'), + read_value_none(dicom, 'EchoNumbers'), + read_value_none(dicom, 'EchoTime'), + read_value_none(dicom, 'SequenceName'), + ) + + +def get_other_file_info(file_path: str) -> DicomStudyOtherFile: + """ + Get information about a non-DICOM file within a DICOM study. + """ + + return DicomStudyOtherFile( + os.path.basename(file_path), + compute_file_md5_hash(file_path), + ) + + +def get_dicom_series_info(dicom: pydicom.Dataset): + """ + Get information about a DICOM series within a DICOM study. + """ + + return DicomStudyDicomSeries( + read_value(dicom, 'SeriesNumber'), + read_value_none(dicom, 'SeriesInstanceUID'), + read_value_none(dicom, 'SeriesDescription'), + read_value_none(dicom, 'SequenceName'), + read_value_none(dicom, 'EchoTime'), + read_value_none(dicom, 'RepetitionTime'), + read_value_none(dicom, 'InversionTime'), + read_value_none(dicom, 'SliceThickness'), + read_value_none(dicom, 'InPlanePhaseEncodingDirection'), + read_value_none(dicom, 'Modality'), + ) + + +# Read DICOM attributes. + +def read_value(dicom: pydicom.Dataset, tag: str): + """ + Read a DICOM attribute from a DICOM using a given tag, or raise an exception if there is no + attribute with that tag in the DICOM. + """ + + if tag not in dicom: + raise Exception(f"Expected DICOM tag '{tag}' but found none.") + + return dicom[tag].value + + +def read_value_none(dicom: pydicom.Dataset, tag: str): + """ + Read a DICOM attribute from a DICOM using a given tag, or return `None` if there is no + attribute with that tag in the DICOM. + """ + + if tag not in dicom: + for elem in dicom.iterall(): + # to find header information in enhanced DICOMs, need to look into subheaders + if elem.tag == tag: + return elem.value + return None + + return dicom[tag].value or None diff --git a/python/lib/import_dicom_study/summary_type.py b/python/lib/import_dicom_study/summary_type.py new file mode 100644 index 000000000..f5d2c2216 --- /dev/null +++ b/python/lib/import_dicom_study/summary_type.py @@ -0,0 +1,105 @@ +from dataclasses import dataclass +from datetime import date + + +@dataclass +class DicomStudyPatient: + """ + Information about a DICOM study patient. + """ + + id: str + name: str + sex: str | None + birth_date: date | None + + +@dataclass +class DicomStudyScanner: + """ + Information about a DICOM study scanner. + """ + + manufacturer: str | None + model: str | None + serial_number: str | None + software_version: str | None + + +@dataclass +class DicomStudyInfo: + """ + General information about a DICOM study. + """ + + study_uid: str + patient: DicomStudyPatient + scanner: DicomStudyScanner + scan_date: date | None + institution: str | None + modality: str + + +@dataclass +class DicomStudyDicomFile: + """ + Information about a DICOM file within a DICOM sutdy. + """ + + file_name: str + md5_sum: str + series_number: int | None + series_uid: str | None + series_description: str | None + file_number: int | None + echo_number: int | None + echo_time: float | None + sequence_name: str | None + + +@dataclass +class DicomStudyOtherFile: + """ + Information about a non-DICOM file within a DICOM study. + """ + + file_name: str + md5_sum: str + + +# This dataclass does not correspond to a "real" DICOM series, as a DICOM series may actually have +# files that have different echo times, inversion times, repetition times... (for instance in +# multi-echo series). +# Generally, a "real" DICOM series should be uniquely identifiable by using the series instance UID +# DICOM attribute. +# This class corresponds more to a LORIS database DICOM series entry, which is a unique tuple of +# some parameters of the DICOM files of a study (including the DICOM series instance UID). As such, +# there is a 1-to-n relationship between a "real" DICOM series, and the LORIS database DICOM series +# entries. +@dataclass(frozen=True) +class DicomStudyDicomSeries: + """ + Information about an DICOM series within a DICOM study. + """ + + series_number: int + series_uid: str | None + series_description: str | None + sequence_name: str | None + echo_time: float | None # In Milliseconds + repetition_time: float | None # In Milliseconds + inversion_time: float | None # In Milliseconds + slice_thickness: float | None # In Millimeters + phase_encoding: str | None + modality: str | None + + +@dataclass +class DicomStudySummary: + """ + Information about a DICOM study and its files. + """ + + info: DicomStudyInfo + dicom_series_files: dict[DicomStudyDicomSeries, list[DicomStudyDicomFile]] + other_files: list[DicomStudyOtherFile] diff --git a/python/lib/import_dicom_study/summary_write.py b/python/lib/import_dicom_study/summary_write.py new file mode 100644 index 000000000..aad415ebd --- /dev/null +++ b/python/lib/import_dicom_study/summary_write.py @@ -0,0 +1,227 @@ +import xml.etree.ElementTree as ET +from functools import cmp_to_key + +from lib.import_dicom_study.summary_type import ( + DicomStudyDicomFile, + DicomStudyDicomSeries, + DicomStudyInfo, + DicomStudyOtherFile, + DicomStudySummary, +) +from lib.import_dicom_study.text import write_date_none +from lib.import_dicom_study.text_dict import DictWriter +from lib.import_dicom_study.text_table import TableWriter +from lib.util.iter import count, flatten + + +def write_dicom_study_summary_to_file(dicom_summary: DicomStudySummary, filename: str): + """ + Serialize a DICOM study summary object into a text file. + """ + + string = write_dicom_study_summary(dicom_summary) + with open(filename, 'w') as file: + file.write(string) + + +def write_dicom_study_summary(dicom_summary: DicomStudySummary) -> str: + """ + Serialize a DICOM study summary object into a string. + """ + + xml = ET.Element('STUDY') + ET.SubElement(xml, 'STUDY_INFO').text = write_dicom_study_info(dicom_summary.info) + ET.SubElement(xml, 'FILES').text = write_dicom_study_dicom_files(dicom_summary.dicom_series_files) + ET.SubElement(xml, 'OTHERS').text = write_dicom_study_other_files(dicom_summary.other_files) + ET.SubElement(xml, 'ACQUISITIONS').text = write_dicom_study_dicom_series(dicom_summary.dicom_series_files) + ET.SubElement(xml, 'SUMMARY').text = write_dicom_study_ending(dicom_summary) + ET.indent(xml, space='') + return ET.tostring(xml, encoding='unicode') + '\n' + + +def write_dicom_study_info(info: DicomStudyInfo) -> str: + """ + Serialize general information about a DICOM study. + """ + + return '\n' + DictWriter([ + ('Unique Study ID' , info.study_uid), + ('Patient Name' , info.patient.name), + ('Patient ID' , info.patient.id), + ('Patient date of birth' , write_date_none(info.patient.birth_date)), + ('Patient Sex' , info.patient.sex), + ('Scan Date' , write_date_none(info.scan_date)), + ('Scanner Manufacturer' , info.scanner.manufacturer), + ('Scanner Model Name' , info.scanner.model), + ('Scanner Serial Number' , info.scanner.serial_number), + ('Scanner Software Version' , info.scanner.software_version), + ('Institution Name' , info.institution), + ('Modality' , info.modality), + ]).write() + + +def write_dicom_study_dicom_files(dicom_series_files: dict[DicomStudyDicomSeries, list[DicomStudyDicomFile]]) -> str: + """ + Serialize information about the DICOM files of a DICOM study into a table. + """ + + dicom_files = list(flatten(dicom_series_files.values())) + dicom_files.sort(key=cmp_to_key(compare_dicom_files)) + + writer = TableWriter() + writer.append_row(['SN', 'FN', 'EN', 'Series', 'md5sum', 'File name']) + for dicom_file in dicom_files: + writer.append_row([ + dicom_file.series_number, + dicom_file.file_number, + dicom_file.echo_number, + dicom_file.series_description, + dicom_file.md5_sum, + dicom_file.file_name, + ]) + + return '\n' + writer.write() + + +def write_dicom_study_other_files(other_files: list[DicomStudyOtherFile]) -> str: + """ + Serialize information about the non-DICOM files of a DICOM study into a table. + """ + + writer = TableWriter() + writer.append_row(['md5sum', 'File name']) + for other_file in other_files: + writer.append_row([ + other_file.md5_sum, + other_file.file_name, + ]) + + return '\n' + writer.write() + + +def write_dicom_study_dicom_series(dicom_series_files: dict[DicomStudyDicomSeries, list[DicomStudyDicomFile]]) -> str: + """ + Serialize information about the DICOM series of a DICOM study into a table. + """ + + dicom_series_list = list(dicom_series_files.keys()) + dicom_series_list.sort(key=cmp_to_key(compare_dicom_series)) + + writer = TableWriter() + writer.append_row([ + 'Series (SN)', + 'Name of series', + 'Seq Name', + 'echoT ms', + 'repT ms', + 'invT ms', + 'sth mm', + 'PhEnc', + 'NoF', + 'Series UID', + 'Mod' + ]) + + for dicom_series in dicom_series_list: + dicom_files = dicom_series_files[dicom_series] + + writer.append_row([ + dicom_series.series_number, + dicom_series.series_description, + dicom_series.sequence_name, + dicom_series.echo_time, + dicom_series.repetition_time, + dicom_series.inversion_time, + dicom_series.slice_thickness, + dicom_series.phase_encoding, + len(dicom_files), + dicom_series.series_uid, + dicom_series.modality, + ]) + + return '\n' + writer.write() + + +def write_dicom_study_ending(dicom_summary: DicomStudySummary) -> str: + """ + Serialize some additional information about a DICOM study. + """ + + birth_date = dicom_summary.info.patient.birth_date + scan_date = dicom_summary.info.scan_date + + if birth_date and scan_date: + years = scan_date.year - birth_date.year + months = scan_date.month - birth_date.month + days = scan_date.day - birth_date.day + total = round(years + months / 12 + days / 365.0, 2) + age = f'{total} or {years} years, {months} months {days} days' + else: + age = '' + + dicom_files_count = count(flatten(dicom_summary.dicom_series_files.values())) + other_files_count = len(dicom_summary.other_files) + + return '\n' + DictWriter([ + ('Total number of files', dicom_files_count + other_files_count), + ('Age at scan', age), + ]).write() + + +# Comparison functions used to sort the various DICOM study information objects. + +def compare_dicom_files(a: DicomStudyDicomFile, b: DicomStudyDicomFile): + """ + Compare two DICOM file informations in accordance with `functools.cmp_to_key`. + """ + + return \ + compare_int_none(a.series_number, b.series_number) or \ + compare_int_none(a.file_number, b.file_number) or \ + compare_int_none(a.echo_number, b.echo_number) + + +def compare_dicom_series(a: DicomStudyDicomSeries, b: DicomStudyDicomSeries): + """ + Compare two acquisition informations in accordance with `functools.cmp_to_key`. + """ + + return \ + a.series_number - b.series_number or \ + compare_string_none(a.sequence_name, b.sequence_name) + + +def compare_int_none(a: int | None, b: int | None): + """ + Compare two nullable integers in accordance with `functools.cmp_to_key`. + """ + + match a, b: + case None, None: + return 0 + case _, None: + return -1 + case None, _: + return 1 + case a, b: + return a - b + + +def compare_string_none(a: str | None, b: str | None): + """ + Compare two nullable strings in accordance with `functools.cmp_to_key`. + """ + + match a, b: + case None, None: + return 0 + case _, None: + return -1 + case None, _: + return 1 + case a, b if a < b: + return -1 + case a, b if a > b: + return 1 + case a, b: + return 0 diff --git a/python/lib/import_dicom_study/text.py b/python/lib/import_dicom_study/text.py new file mode 100644 index 000000000..b5f3004fe --- /dev/null +++ b/python/lib/import_dicom_study/text.py @@ -0,0 +1,74 @@ +""" +A bunch of functions to convert values between (possibly empty) strings and +different types of values. +""" + +import os +from datetime import date, datetime + +from lib.util.crypto import compute_file_md5_hash + + +def write_value(value: str | int | float | None): + if value is None: + return '' + + return str(value) + + +def write_datetime(datetime: datetime): + return datetime.strftime('%Y-%m-%d %H:%M:%S') + + +def write_date(date: date): + return date.strftime('%Y-%m-%d') + + +def write_date_none(date: date | None): + if date is None: + return None + + return write_date(date) + + +def read_none(string: str): + if string == '': + return None + + return string + + +def read_date_none(string: str | None): + if string is None: + return None + + return datetime.strptime(string, '%Y-%m-%d').date() + + +def read_dicom_date_none(string: str | None): + if string is None: + return None + + return datetime.strptime(string, '%Y%m%d').date() + + +def read_int_none(string: str | None): + if string is None: + return None + + return int(string) + + +def read_float_none(string: str | None): + if string is None: + return None + + return float(string) + + +def compute_md5_hash_with_name(path: str): + """ + Get the MD5 sum hash of a file with the filename appended. + """ + + return f'{compute_file_md5_hash(path)} {os.path.basename(path)}' diff --git a/python/lib/import_dicom_study/text_dict.py b/python/lib/import_dicom_study/text_dict.py new file mode 100644 index 000000000..91ea94088 --- /dev/null +++ b/python/lib/import_dicom_study/text_dict.py @@ -0,0 +1,44 @@ +from lib.import_dicom_study.text import write_value + + +class DictWriter: + """ + Writer for a text dictionary, that is, a text of the form: + + Key 1 : Value 1 + Key 2 : Value 2 + ... + """ + + def __init__(self, entries: list[tuple[str, str | int | float | None]]): + self.entries = entries + + def get_keys_length(self): + """ + Get the maximal length of the keys, used for padding. + """ + + length = 0 + for entry in self.entries: + key = entry[0] + if len(key) > length: + length = len(key) + + return length + + def write(self): + """ + Serialize the text dictionary into a string. + """ + + if not self.entries: + return '\n' + + length = self.get_keys_length() + + entries = map( + lambda entry: f'* {entry[0].ljust(length)} : {write_value(entry[1])}\n', + self.entries, + ) + + return ''.join(entries) diff --git a/python/lib/import_dicom_study/text_table.py b/python/lib/import_dicom_study/text_table.py new file mode 100644 index 000000000..7ffbface4 --- /dev/null +++ b/python/lib/import_dicom_study/text_table.py @@ -0,0 +1,52 @@ +from lib.import_dicom_study.text import write_value + + +class TableWriter: + """ + Writer for a text table, that is, a table of the form: + + Field 1 | Field 2 | Field 3 + Value 1 | Value 2 | Value 3 + Value 4 | Value 5 | Value 6 + ... + """ + + rows: list[list[str]] + + def __init__(self): + self.rows = [] + + def get_cells_lengths(self): + """ + Get the longest value length of each column, used for padding. + """ + + lengths = [0] * len(self.rows[0]) + for row in self.rows: + for i in range(len(row)): + if len(row[i]) > lengths[i]: + lengths[i] = len(row[i]) + + return lengths + + def append_row(self, cells: list[str | int | float | None]): + """ + Add a row to the table, which can be either the header or some values. + """ + + self.rows.append(list(map(write_value, cells))) + + def write(self): + """ + Serialize the text table into a string. + """ + + if not self.rows: + return '\n' + + lengths = self.get_cells_lengths() + + rows = map(lambda row: list(map(lambda cell, length: cell.ljust(length), row, lengths)), self.rows) + rows = map(lambda row: ' | '.join(row).rstrip() + '\n', rows) + + return ''.join(rows) diff --git a/python/lib/util/iter.py b/python/lib/util/iter.py index 94ee18c4d..5b243b4e3 100644 --- a/python/lib/util/iter.py +++ b/python/lib/util/iter.py @@ -1,9 +1,30 @@ -from collections.abc import Callable, Iterable, Iterator +from collections.abc import Callable, Iterable, Iterator, Sized from typing import TypeVar T = TypeVar('T') +def count(iterable: Iterable[T]) -> int: + """ + Count the number of elements in an iterable. + + If the iterable is sized, this function uses the `__len__` method. + If the iterable is an iterator, this function consumes the iterator. + """ + + if isinstance(iterable, Sized): + return len(iterable) + + count = 0 + for _ in iterable: + count += 1 + + return count + + +T = TypeVar('T') # type: ignore + + def find(predicate: Callable[[T], bool], iterable: Iterable[T]) -> T | None: """ Find the first element in an iterable that satisfies a predicate, or return `None` if no match @@ -30,3 +51,15 @@ def filter_map(function: Callable[[T], U | None], iterable: Iterable[T]) -> Iter result = function(item) if result is not None: yield result + + +T = TypeVar('T') # type: ignore + + +def flatten(iterables: Iterable[Iterable[T]]) -> Iterator[T]: + """ + Flatten an iterable of iterables into a single iterator. + """ + + for iterable in iterables: + yield from iterable diff --git a/python/scripts/import_dicom_study.py b/python/scripts/import_dicom_study.py new file mode 100755 index 000000000..492ab56f7 --- /dev/null +++ b/python/scripts/import_dicom_study.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python + +import gzip +import os +import shutil +import tarfile +import tempfile +from typing import Any, cast + +import lib.database +import lib.exitcode +import lib.import_dicom_study.text +from lib.db.models.dicom_archive import DbDicomArchive +from lib.db.queries.config import get_config_with_setting_name +from lib.db.queries.dicom_archive import try_get_dicom_archive_with_study_uid +from lib.get_subject_session import get_subject_session +from lib.import_dicom_study.dicom_database import insert_dicom_archive, update_dicom_archive +from lib.import_dicom_study.import_log import ( + make_dicom_study_import_log, + write_dicom_study_import_log_to_file, + write_dicom_study_import_log_to_string, +) +from lib.import_dicom_study.summary_get import get_dicom_study_summary +from lib.import_dicom_study.summary_write import write_dicom_study_summary_to_file +from lib.logging import log, log_error_exit, log_warning +from lib.lorisgetopt import LorisGetOpt +from lib.make_env import make_env +from lib.util.fs import iter_all_dir_files +from lib.validate_subject_info import validate_subject_info + + +class Args: + profile: str + source: str + insert: bool + update: bool + session: bool + overwrite: bool + verbose: bool + + def __init__(self, options_dict: dict[str, Any]): + self.profile = options_dict['profile']['value'] + self.source = os.path.normpath(options_dict['source']['value']) + self.overwrite = options_dict['overwrite']['value'] + self.insert = options_dict['insert']['value'] + self.update = options_dict['update']['value'] + self.session = options_dict['session']['value'] + self.verbose = options_dict['verbose']['value'] + + +def main() -> None: + usage = ( + "\n" + "********************************************************************\n" + " DICOM STUDY IMPORT SCRIPT\n" + "********************************************************************\n" + "This script reads a directory containing the DICOM files of a study, processes the\n" + "directory into a structured and compressed archive, and inserts or uploads the study\n" + "into the LORIS database.\n" + "\n" + "Usage: import_dicom_study.py -p -s ...\n" + "\n" + "Options: \n" + "\t-p, --profile : Name of the LORIS Python configuration file (usually\n" + "\t 'database_config.py')\n" + "\t-s, --source : Path of the source directory containing the DICOM files of the" + "\t study.\n" + "\t --overwrite : Overwrite the DICOM archive file if it already exists.\n" + "\t --insert : Insert the created DICOM archive in the database (requires the archive\n" + "\t to not be already inserted).\n" + "\t --update : Update the DICOM archive in the database (requires the archive to be\n" + "\t already be inserted), generally used with '--overwrite'.\n" + "\t --session : Associate the DICOM study with an existing session using the LORIS-MRI\n" + "\t Python configuration.\n" + "\t-v, --verbose : If set, be verbose\n" + "\n" + "Required options: \n" + "\t--profile\n" + "\t--source\n" + "\t--target\n" + ) + + # NOTE: Some options do not have short options but LorisGetOpt does not support that, so we + # repeat the long names. + options_dict = { + "profile": { + "value": None, "required": True, "expect_arg": True, "short_opt": "p", "is_path": False + }, + "source": { + "value": None, "required": True, "expect_arg": True, "short_opt": "s", "is_path": True, + }, + "overwrite": { + "value": False, "required": False, "expect_arg": False, "short_opt": "overwrite", "is_path": False, + }, + "insert": { + "value": False, "required": False, "expect_arg": False, "short_opt": "insert", "is_path": False, + }, + "update": { + "value": False, "required": False, "expect_arg": False, "short_opt": "update", "is_path": False, + }, + "session": { + "value": False, "required": False, "expect_arg": False, "short_opt": "session", "is_path": False, + }, + "verbose": { + "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False + }, + "help": { + "value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False + }, + } + + # Get the CLI arguments and connect to the database. + + loris_getopt_obj = LorisGetOpt(usage, options_dict, os.path.basename(__file__[:-3])) + env = make_env(loris_getopt_obj) + args = Args(loris_getopt_obj.options_dict) + + # Check arguments. + + if not os.path.isdir(args.source) or not os.access(args.source, os.R_OK): + log_error_exit( + env, + "Argument '--source' must be a readable directory path.", + lib.exitcode.INVALID_ARG, + ) + + if args.insert and args.update: + log_error_exit( + env, + "Arguments '--insert' and '--update' cannot be used both at the same time.", + lib.exitcode.INVALID_ARG, + ) + + if args.session and not (args.insert or args.update): + log_error_exit( + env, + "Argument '--insert' or '--update' must be used when '--session' is used.", + lib.exitcode.INVALID_ARG, + ) + + # Utility variables. + + dicom_study_name = os.path.basename(args.source) + + log(env, "Extracting DICOM information... (may take a long time)") + + dicom_summary = get_dicom_study_summary(args.source, args.verbose) + + log(env, "Checking if the DICOM study is already inserted in LORIS...") + + dicom_archive = try_get_dicom_archive_with_study_uid(env.db, dicom_summary.info.study_uid) + + if dicom_archive is not None: + log(env, "Found the DICOM study in LORIS.") + + if args.insert: + log_error_exit( + env, + ( + "Cannot insert the DICOM study since it is already inserted in LORIS. Use" + " arguments '--update' and '--overwrite' to update the currently insrted DICOM" + " study.\n" + f"Inserted DICOM study import log:\n{dicom_archive.create_info}" + ), + lib.exitcode.INSERT_FAILURE, + ) + + if dicom_archive is None: + log(env, "Did not find the DICOM study in LORIS.") + + if args.update: + log_error_exit( + env, + ( + "Cannot update the DICOM study since it is not already inserted in LORIS. Use" + " argument '--insert' to insert the DICOM study in LORIS." + ), + lib.exitcode.UPDATE_FAILURE, + ) + + session = None + if args.session: + # TODO: Factorize this code into a streamlined way to get the session from the configuration. + subject_info = loris_getopt_obj.config_info.get_subject_info( # type: ignore + loris_getopt_obj.db, + str(dicom_summary.info.patient.name) + ) + + validate_subject_info(env.db, subject_info) + session = get_subject_session(env, subject_info) + + log(env, 'Checking DICOM scan date...') + + # TODO: Factorize this into a `lib.config` module and add some checks (directory exists, permissions). + dicom_archive_dir_path = get_config_with_setting_name(env.db, 'tarchiveLibraryDir').value + if dicom_archive_dir_path is None: + log_error_exit(env, "No value found for configuration setting 'tarchiveLibraryDir'.") + + if dicom_summary.info.scan_date is None: + log_warning(env, "No DICOM scan date found in the DICOM files.") + + dicom_archive_rel_path = f'DCM_{dicom_study_name}.tar' + else: + log(env, f"Found DICOM scan date: {dicom_summary.info.scan_date}") + + scan_date_string = lib.import_dicom_study.text.write_date(dicom_summary.info.scan_date) + dicom_archive_rel_path = os.path.join( + str(dicom_summary.info.scan_date.year), + f'DCM_{scan_date_string}_{dicom_study_name}.tar', + ) + + dicom_archive_year_dir_path = os.path.join(dicom_archive_dir_path, str(dicom_summary.info.scan_date.year)) + if not os.path.exists(dicom_archive_year_dir_path): + log(env, f"Creating year directory '{dicom_archive_year_dir_path}'...") + os.mkdir(dicom_archive_year_dir_path) + + dicom_archive_path = os.path.join(dicom_archive_dir_path, dicom_archive_rel_path) + + if os.path.exists(dicom_archive_path): + if not args.overwrite: + log_error_exit( + env, + f"File '{dicom_archive_path}' already exists. Use argument '--overwrite' to overwrite it", + ) + + log_warning(env, f"Overwriting file '{dicom_archive_path}'...") + + os.remove(dicom_archive_path) + + with tempfile.TemporaryDirectory() as tmp_dir_path: + tar_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.tar') + zip_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.tar.gz') + summary_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.meta') + log_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.log') + + log(env, "Copying the DICOM files into a new tar archive...") + + with tarfile.open(tar_path, 'w') as tar: + for file_rel_path in iter_all_dir_files(args.source): + file_path = os.path.join(args.source, file_rel_path) + file_tar_path = os.path.join(os.path.basename(args.source), file_rel_path) + tar.add(file_path, arcname=file_tar_path) + + log(env, "Calculating the tar archive MD5 sum...") + + tar_md5_sum = lib.import_dicom_study.text.compute_md5_hash_with_name(tar_path) + + log(env, "Zipping the tar archive... (may take a long time)") + + with open(tar_path, 'rb') as tar: + # 6 is the default compression level of the `tar` command, Python's + # default is 9, which is more compressed but also a lot slower. + with gzip.open(zip_path, 'wb', compresslevel=6) as zip: + shutil.copyfileobj(tar, zip) + + log(env, "Calculating the zipped tar archive MD5 sum...") + + zip_md5_sum = lib.import_dicom_study.text.compute_md5_hash_with_name(zip_path) + + log(env, "Creating DICOM study import log...") + + dicom_import_log = make_dicom_study_import_log(args.source, dicom_archive_path, tar_md5_sum, zip_md5_sum) + + if args.verbose: + dicom_import_log_string = write_dicom_study_import_log_to_string(dicom_import_log) + log(env, f"The archive will be created with the following arguments:\n{dicom_import_log_string}") + + log(env, "Writing DICOM study summary file...") + + write_dicom_study_summary_to_file(dicom_summary, summary_path) + + log(env, "Writing DICOM study import log file...") + + write_dicom_study_import_log_to_file(dicom_import_log, log_path) + + log(env, 'Copying files into the final DICOM study archive...') + + with tarfile.open(dicom_archive_path, 'w') as tar: + tar.add(zip_path, os.path.basename(zip_path)) + tar.add(summary_path, os.path.basename(summary_path)) + tar.add(log_path, os.path.basename(log_path)) + + log(env, "Calculating final DICOM study archive MD5 sum...") + + dicom_import_log.archive_md5_sum = lib.import_dicom_study.text.compute_md5_hash_with_name( + dicom_import_log.target_path + ) + + if args.insert: + log(env, "Inserting the DICOM study in the LORIS database...") + + dicom_archive = insert_dicom_archive(env.db, dicom_summary, dicom_import_log, dicom_archive_rel_path) + + if args.update: + log(env, "Updating the DICOM study in the LORIS database...") + + # Safe because we previously checked that the DICOM study is in LORIS. + dicom_archive = cast(DbDicomArchive, dicom_archive) + + update_dicom_archive(env.db, dicom_archive, dicom_summary, dicom_import_log, dicom_archive_rel_path) + + if session is not None: + log(env, "Updating the DICOM study session...") + + # Safe because we previously checked that the DICOM study is in LORIS. + dicom_archive = cast(DbDicomArchive, dicom_archive) + dicom_archive.session = session + env.db.commit() + + log(env, "Success !") + + +if __name__ == '__main__': + main() diff --git a/python/scripts/summarize_dicom_study.py b/python/scripts/summarize_dicom_study.py new file mode 100755 index 000000000..a0fffbb32 --- /dev/null +++ b/python/scripts/summarize_dicom_study.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +import argparse +import sys +from dataclasses import dataclass + +import lib.exitcode +from lib.import_dicom_study.summary_get import get_dicom_study_summary +from lib.import_dicom_study.summary_write import write_dicom_study_summary + +parser = argparse.ArgumentParser(description=( + 'Read a DICOM directory and print the DICOM summary of this directory ' + 'in the the console.' + )) + +parser.add_argument( + 'directory', + help='The DICOM directory') + +parser.add_argument( + '--verbose', + action='store_true', + help='Set the script to be verbose') + + +@dataclass +class Args: + directory: str + verbose: bool + + +def main() -> None: + parsed_args = parser.parse_args() + args = Args(parsed_args.directory, parsed_args.verbose) + + try: + summary = get_dicom_study_summary(args.directory, args.verbose) + except Exception as e: + print( + ( + f"ERROR: Cannot create a summary for the directory '{args.directory}'.\n" + f"Exception message:\n{e}" + ), + file=sys.stderr + ) + exit(lib.exitcode.INVALID_DICOM) + + print(write_dicom_study_summary(summary)) + + +if __name__ == "__main__": + main() diff --git a/python/tests/unit/db/query/test_dicom_archive.py b/python/tests/unit/db/query/test_dicom_archive.py index 23dfeb5b2..102790257 100644 --- a/python/tests/unit/db/query/test_dicom_archive.py +++ b/python/tests/unit/db/query/test_dicom_archive.py @@ -7,11 +7,7 @@ from lib.db.models.dicom_archive import DbDicomArchive from lib.db.models.dicom_archive_file import DbDicomArchiveFile from lib.db.models.dicom_archive_series import DbDicomArchiveSeries -from lib.db.queries.dicom_archive import ( - delete_dicom_archive_file_series, - get_dicom_archive_series_with_file_info, - try_get_dicom_archive_with_study_uid, -) +from lib.db.queries.dicom_archive import delete_dicom_archive_file_series, try_get_dicom_archive_with_study_uid from tests.util.database import create_test_database @@ -162,15 +158,3 @@ def test_delete_dicom_archive_file_series(setup: Setup): assert setup.db.execute(select(DbDicomArchiveSeries) .where(DbDicomArchiveSeries.archive_id == setup.dicom_archive.id)).first() is None - - -def test_get_dicom_archive_series_with_file_info(setup: Setup): - dicom_archive_series = get_dicom_archive_series_with_file_info( - setup.db, - '1.3.12.2.11.11.11.999.0.0', - 1, - 100, - 'ep_b100', - ) - - assert dicom_archive_series is setup.dicom_archive_series