From 01063066590eb97dd9287da7a272f59c817ad5a1 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Thu, 8 Aug 2024 13:12:55 -0400 Subject: [PATCH 1/3] improve subjects determination and validation --- python/lib/database_lib/candidate_db.py | 23 ++-- python/lib/database_lib/visit_windows.py | 10 +- .../base_pipeline.py | 64 ++++------- .../nifti_insertion_pipeline.py | 39 +++++-- .../exception/determine_subject_exception.py | 10 ++ .../exception/validate_subject_exception.py | 10 ++ python/lib/imaging.py | 101 +++++------------- python/lib/validate_subject_ids.py | 69 ++++++++++++ 8 files changed, 177 insertions(+), 149 deletions(-) create mode 100644 python/lib/exception/determine_subject_exception.py create mode 100644 python/lib/exception/validate_subject_exception.py create mode 100644 python/lib/validate_subject_ids.py diff --git a/python/lib/database_lib/candidate_db.py b/python/lib/database_lib/candidate_db.py index bbe4e92d3..791b57025 100644 --- a/python/lib/database_lib/candidate_db.py +++ b/python/lib/database_lib/candidate_db.py @@ -35,23 +35,16 @@ def __init__(self, db, verbose): self.db = db self.verbose = verbose - def check_candid_pscid_combination(self, psc_id, cand_id): + def get_candidate_psc_id(self, cand_id: str | int) -> str | None: """ - Checks whether the PSCID/CandID combination corresponds to a valid candidate in the `candidate` table. - - :param psc_id: PSCID of the candidate - :type psc_id: str - :param cand_id: CandID of the candidate - :type cand_id: int - - :returns: the valid CandID and PSCID if the combination corresponds to a candidate, None otherwise + Return a candidate PSCID and based on its CandID, or `None` if no candidate is found in + the database. """ - query = "SELECT c1.CandID, c2.PSCID AS PSCID " \ - " FROM candidate c1 " \ - " LEFT JOIN candidate c2 ON (c1.CandID=c2.CandID AND c2.PSCID = %s) " \ - " WHERE c1.CandID = %s" + query = 'SELECT PSCID ' \ + 'FROM candidate ' \ + 'WHERE CandID = %s' - results = self.db.pselect(query=query, args=(psc_id, cand_id)) + results = self.db.pselect(query, args=[cand_id]) - return results if results else None + return results[0]['PSCID'] if results else None diff --git a/python/lib/database_lib/visit_windows.py b/python/lib/database_lib/visit_windows.py index 14e1636ee..058798c8d 100644 --- a/python/lib/database_lib/visit_windows.py +++ b/python/lib/database_lib/visit_windows.py @@ -35,15 +35,11 @@ def __init__(self, db, verbose): self.db = db self.verbose = verbose - def check_visit_label_exits(self, visit_label): + def check_visit_label_exists(self, visit_label: str) -> bool: """ - Returns a list of dictionaries storing the list of Visit_label present in the Visit_Windows table. - - :return: list of dictionaries with the list of Visit_label present in the Visit_Windows table - :rtype: list + Check if a visit label exists in the Visit_Windows database table. """ query = 'SELECT Visit_label FROM Visit_Windows WHERE BINARY Visit_label = %s' results = self.db.pselect(query=query, args=(visit_label,)) - - return results if results else None + return bool(results) diff --git a/python/lib/dcm2bids_imaging_pipeline_lib/base_pipeline.py b/python/lib/dcm2bids_imaging_pipeline_lib/base_pipeline.py index 01176531c..6e0a4b1e7 100644 --- a/python/lib/dcm2bids_imaging_pipeline_lib/base_pipeline.py +++ b/python/lib/dcm2bids_imaging_pipeline_lib/base_pipeline.py @@ -3,18 +3,19 @@ import shutil import sys +from lib.exception.determine_subject_exception import DetermineSubjectException +from lib.exception.validate_subject_exception import ValidateSubjectException import lib.exitcode import lib.utilities -from lib.database_lib.candidate_db import CandidateDB from lib.database_lib.config import Config -from lib.database_lib.visit_windows import VisitWindows from lib.database import Database from lib.dicom_archive import DicomArchive from lib.imaging import Imaging from lib.log import Log from lib.imaging_upload import ImagingUpload from lib.session import Session +from lib.validate_subject_ids import validate_subject_ids class BasePipeline: @@ -105,10 +106,11 @@ def __init__(self, loris_getopt_obj, script_name): # Grep scanner information based on what is in the DICOM headers # --------------------------------------------------------------------------------- if self.dicom_archive_obj.tarchive_info_dict.keys(): - self.subject_id_dict = self.imaging_obj.determine_subject_ids(self.dicom_archive_obj.tarchive_info_dict) - if 'error_message' in self.subject_id_dict: + try: + self.subject_id_dict = self.imaging_obj.determine_subject_ids(self.dicom_archive_obj.tarchive_info_dict) + except DetermineSubjectException as exception: self.log_error_and_exit( - self.subject_id_dict['error_message'], + exception.message, lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE, is_error="Y", is_verbose="N" @@ -235,52 +237,30 @@ def validate_subject_ids(self): """ Ensure that the subject PSCID/CandID corresponds to a single candidate in the candidate table and that the visit label can be found in the Visit_Windows table. If those - conditions are not fulfilled, then a 'CandMismatchError' with the validation error - is added to the subject IDs dictionary (subject_id_dict). + conditions are not fulfilled. """ - psc_id = self.subject_id_dict["PSCID"] - cand_id = self.subject_id_dict["CandID"] - visit_label = self.subject_id_dict["visitLabel"] - is_phantom = self.subject_id_dict["isPhantom"] - # no further checking if the subject is phantom - if is_phantom: + if self.subject_id_dict['isPhantom']: return - # check that the CandID and PSCID are valid - candidate_db_obj = CandidateDB(self.db, self.verbose) - results = candidate_db_obj.check_candid_pscid_combination(psc_id, cand_id) - if not results: - # if no rows were returned, then the CandID is not valid - self.subject_id_dict["message"] = f"=> Could not find candidate with CandID={cand_id} in the database" - self.subject_id_dict["CandMismatchError"] = "CandID does not exist" - elif not results[0]["PSCID"]: - # if no PSCID returned in the row, then PSCID and CandID do not match - self.subject_id_dict["message"] = "=> PSCID and CandID of the image mismatch" - self.subject_id_dict["CandMismatchError"] = self.subject_id_dict['message'] - - # check if visit label is valid - visit_windows_obj = VisitWindows(self.db, self.verbose) - results = visit_windows_obj.check_visit_label_exits(visit_label) - if results: - self.subject_id_dict["message"] = f"Found visit label {visit_label} in Visit_Windows" - elif self.subject_id_dict["createVisitLabel"]: - self.subject_id_dict["message"] = f"Will create visit label {visit_label} in Visit_Windows" - else: - self.subject_id_dict["message"] = f"Visit Label {visit_label} does not exist in Visit_Windows" - self.subject_id_dict["CandMismatchError"] = self.subject_id_dict['message'] + try: + validate_subject_ids( + self.db, + self.verbose, + self.subject_id_dict['PSCID'], + self.subject_id_dict['CandID'], + self.subject_id_dict['visitLabel'], + bool(self.subject_id_dict['createVisitLabel']), + ) - if "CandMismatchError" in self.subject_id_dict.keys(): - # if there is a candidate mismatch error, log it but do not exit. It will be logged later in SQL table - self.log_info(self.subject_id_dict["CandMismatchError"], is_error="Y", is_verbose="N") self.imaging_upload_obj.update_mri_upload( - upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('0',) + upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('1',) ) - else: - self.log_info(self.subject_id_dict["message"], is_error="N", is_verbose="Y") + except ValidateSubjectException as exception: + self.log_info(exception.message, is_error='Y', is_verbose='N') self.imaging_upload_obj.update_mri_upload( - upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('1',) + upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('0',) ) def log_error_and_exit(self, message, exit_code, is_error, is_verbose): diff --git a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py index 5e4315076..36e79ccf5 100644 --- a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py +++ b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py @@ -1,6 +1,8 @@ import datetime import getpass import json +from lib.exception.determine_subject_exception import DetermineSubjectException +from lib.exception.validate_subject_exception import ValidateSubjectException import lib.exitcode import lib.utilities as utilities import os @@ -9,6 +11,7 @@ import sys from lib.dcm2bids_imaging_pipeline_lib.base_pipeline import BasePipeline +from lib.validate_subject_ids import validate_subject_ids __license__ = "GPLv3" @@ -86,19 +89,30 @@ def __init__(self, loris_getopt_obj, script_name): ) else: self._determine_subject_ids_based_on_json_patient_name() - self.validate_subject_ids() - if "CandMismatchError" in self.subject_id_dict.keys(): + + try: + validate_subject_ids( + self.db, + self.verbose, + self.subject_id_dict['PSCID'], + self.subject_id_dict['CandID'], + self.subject_id_dict['visitLabel'], + bool(self.subject_id_dict['createVisitLabel']), + ) + except ValidateSubjectException as exception: self.imaging_obj.insert_mri_candidate_errors( - self.dicom_archive_obj.tarchive_info_dict["PatientName"], - self.dicom_archive_obj.tarchive_info_dict["TarchiveID"], + self.dicom_archive_obj.tarchive_info_dict['PatientName'], + self.dicom_archive_obj.tarchive_info_dict['TarchiveID'], self.json_file_dict, self.nifti_path, - self.subject_id_dict["CandMismatchError"] + exception.message, ) + if self.nifti_s3_url: # push candidate errors to S3 if provided file was on S3 self._run_push_to_s3_pipeline() + self.log_error_and_exit( - self.subject_id_dict['CandMismatchError'], lib.exitcode.CANDIDATE_MISMATCH, is_error="Y", is_verbose="N" + exception.message, lib.exitcode.CANDIDATE_MISMATCH, is_error='Y', is_verbose='N' ) # --------------------------------------------------------------------------------------------- @@ -312,11 +326,14 @@ def _determine_subject_ids_based_on_json_patient_name(self): dicom_value = self.json_file_dict[dicom_header] try: - self.subject_id_dict = self.config_file.get_subject_ids(self.db, dicom_value, None) - self.subject_id_dict["PatientName"] = dicom_value - except AttributeError: - message = "Config file does not contain a get_subject_ids routine. Upload will exit now." - self.log_error_and_exit(message, lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE, is_error="Y", is_verbose="N") + self.subject_id_dict = self.imaging_obj.determine_subject_ids(dicom_value) + except DetermineSubjectException as exception: + self.log_error_and_exit( + exception.message, + lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE, + is_error='Y', + is_verbose='N' + ) self.log_info("Determined subject IDs based on PatientName stored in JSON file", is_error="N", is_verbose="Y") diff --git a/python/lib/exception/determine_subject_exception.py b/python/lib/exception/determine_subject_exception.py new file mode 100644 index 000000000..dc00009b9 --- /dev/null +++ b/python/lib/exception/determine_subject_exception.py @@ -0,0 +1,10 @@ +class DetermineSubjectException(Exception): + """ + Exception raised if some subject IDs cannot be determined using the config file. + """ + + message: str + + def __init__(self, message: str): + super().__init__(message) + self.message = message diff --git a/python/lib/exception/validate_subject_exception.py b/python/lib/exception/validate_subject_exception.py new file mode 100644 index 000000000..48ee97a5e --- /dev/null +++ b/python/lib/exception/validate_subject_exception.py @@ -0,0 +1,10 @@ +class ValidateSubjectException(Exception): + """ + Exception raised if some subject IDs validation fails. + """ + + message: str + + def __init__(self, message: str): + super().__init__(message) + self.message = message diff --git a/python/lib/imaging.py b/python/lib/imaging.py index 4a09b7950..bb513e55a 100644 --- a/python/lib/imaging.py +++ b/python/lib/imaging.py @@ -3,6 +3,7 @@ import os import datetime import json +from typing import Any, Optional import lib.utilities as utilities import nibabel as nib import re @@ -21,6 +22,7 @@ from lib.database_lib.mri_violations_log import MriViolationsLog from lib.database_lib.parameter_file import ParameterFile from lib.database_lib.parameter_type import ParameterType +from lib.exception.determine_subject_exception import DetermineSubjectException __license__ = "GPLv3" @@ -510,93 +512,44 @@ def grep_cand_id_from_file_id(self, file_id): # return the result return results[0]['CandID'] if results else None - def determine_subject_ids(self, tarchive_info_dict, scanner_id=None): + def determine_subject_ids(self, tarchive_info_dict, scanner_id: Optional[int] = None) -> dict[str, Any]: """ Determine subject IDs based on the DICOM header specified by the lookupCenterNameUsing - config setting. This function will call a function in the config file that can be + config setting. This function will call a function in the configuration file that can be customized for each project. - :param tarchive_info_dict: dictionary with information about the DICOM archive queried - from the tarchive table - :type tarchive_info_dict: dict - :param scanner_id : ScannerID - :type scanner_id : int or None + :param tarchive_info_dict : Dictionary with information about the DICOM archive queried + from the tarchive table + :param scanner_id : The ScannerID if there is one - :return subject_id_dict: dictionary with subject IDs and visit label or error status - :rtype subject_id_dict: dict + :raises DetermineSubjectException: Exception if the subject IDs cannot be determined from + the configuration file. + + :return: Dictionary with subject IDs and visit label. """ config_obj = Config(self.db, self.verbose) dicom_header = config_obj.get_config('lookupCenterNameUsing') - dicom_value = tarchive_info_dict[dicom_header] + subject_name = tarchive_info_dict[dicom_header] try: - subject_id_dict = self.config_file.get_subject_ids(self.db, dicom_value, scanner_id) - subject_id_dict['PatientName'] = dicom_value + subject_id_dict = self.config_file.get_subject_ids(self.db, subject_name, scanner_id) except AttributeError: - message = 'Config file does not contain a get_subject_ids routine. Upload will exit now.' - return {'error_message': message} - - return subject_id_dict - - def validate_subject_ids(self, subject_id_dict): - """ - Ensure that the subject PSCID/CandID corresponds to a single candidate in the candidate - table and that the visit label can be found in the Visit_Windows table. If those - conditions are not fulfilled, then a 'CandMismatchError' with the validation error - is added to the subject IDs dictionary (subject_id_dict). - - :param subject_id_dict : dictionary with subject IDs and visit label - :type subject_id_dict : dict - - :return: True if the subject IDs are valid, False otherwise - :rtype: bool - """ - - psc_id = subject_id_dict['PSCID'] - cand_id = subject_id_dict['CandID'] - visit_label = subject_id_dict['visitLabel'] - is_phantom = subject_id_dict['isPhantom'] - - # no further checking if the subject is phantom - if is_phantom: - return True + raise DetermineSubjectException( + 'Config file does not contain a `get_subject_ids` function. Upload will exit now.' + ) - # check that the CandID and PSCID are valid - # TODO use candidate_db class for that for bids_import - query = 'SELECT c1.CandID, c2.PSCID AS PSCID ' \ - ' FROM candidate c1 ' \ - ' LEFT JOIN candidate c2 ON (c1.CandID=c2.CandID AND c2.PSCID = %s) ' \ - ' WHERE c1.CandID = %s' - results = self.db.pselect(query=query, args=(psc_id, cand_id)) - if not results: - # if no rows were returned, then the CandID is not valid - subject_id_dict['message'] = '=> Could not find candidate with CandID=' + cand_id \ - + ' in the database' - subject_id_dict['CandMismatchError'] = 'CandID does not exist' - return False - elif not results[0]['PSCID']: - # if no PSCID returned in the row, then PSCID and CandID do not match - subject_id_dict['message'] = '=> PSCID and CandID of the image mismatch' - # Message is undefined - subject_id_dict['CandMismatchError'] = subject_id_dict['message'] - return False + if subject_id_dict == {}: + raise DetermineSubjectException( + f'Cannot get subject IDs for subject \'{subject_name}\'.\n' + 'Possible causes:\n' + '- The subject name is not correctly formatted (should usually be \'PSCID_CandID_VisitLabel\').\n' + '- The function `get_subject_ids` in the Python configuration file is not properly defined.\n' + '- Other project specific reason.' + ) - # check if visit label is valid - # TODO use visit_windows class for that for bids_import - query = 'SELECT Visit_label FROM Visit_Windows WHERE BINARY Visit_label = %s' - results = self.db.pselect(query=query, args=(visit_label,)) - if results: - subject_id_dict['message'] = f'=> Found visit label {visit_label} in Visit_Windows' - return True - elif subject_id_dict['createVisitLabel']: - subject_id_dict['message'] = f'=> Will create visit label {visit_label} in Visit_Windows' - return True - else: - subject_id_dict['message'] = f'=> Visit Label {visit_label} does not exist in Visit_Windows' - # Message is undefined - subject_id_dict['CandMismatchError'] = subject_id_dict['message'] - return False + subject_id_dict['PatientName'] = subject_name + return subject_id_dict def map_bids_param_to_loris_param(self, file_parameters): """ @@ -1110,7 +1063,7 @@ def get_list_of_files_sorted_by_acq_time(self, files_list): sorted_files_list = sorted(new_files_list, key=lambda x: x['acq_time']) except TypeError: return None - + return sorted_files_list def modify_fmap_json_file_to_write_intended_for(self, sorted_fmap_files_list, s3_obj, tmp_dir): diff --git a/python/lib/validate_subject_ids.py b/python/lib/validate_subject_ids.py new file mode 100644 index 000000000..b2de3d820 --- /dev/null +++ b/python/lib/validate_subject_ids.py @@ -0,0 +1,69 @@ +from dataclasses import dataclass +from lib.database import Database +from lib.database_lib.candidate_db import CandidateDB +from lib.database_lib.visit_windows import VisitWindows +from lib.exception.validate_subject_exception import ValidateSubjectException + + +# Utility class + +@dataclass +class Subject: + """ + Wrapper for the properties of a subject. + """ + + psc_id: str + cand_id: str + visit_label: str + + def get_name(self): + return f'{self.psc_id}_{self.cand_id}_{self.visit_label}' + + +# Main validation functions + +def validate_subject_ids( + db: Database, + verbose: bool, + psc_id: str, + cand_id: str, + visit_label: str, + create_visit: bool +): + """ + Validate a subject's information against the database from its parts (PSCID, CandID, VisitLabel). + Raise an exception if an error is found, or return `None` otherwise. + """ + + subject = Subject(psc_id, cand_id, visit_label) + validate_subject(db, verbose, subject, create_visit) + + +def validate_subject(db: Database, verbose: bool, subject: Subject, create_visit: bool): + candidate_db = CandidateDB(db, verbose) + candidate_psc_id = candidate_db.get_candidate_psc_id(subject.cand_id) + if candidate_psc_id is None: + validate_subject_error( + subject, + f'Candidate (CandID = \'{subject.cand_id}\') does not exist in the database.' + ) + + if candidate_psc_id != subject.psc_id: + validate_subject_error( + subject, + f'Candidate (CandID = \'{subject.cand_id}\') PSCID does not match the subject PSCID.\n' + f'Candidate PSCID = \'{candidate_psc_id}\', Subject PSCID = \'{subject.psc_id}\'' + ) + + visit_window_db = VisitWindows(db, verbose) + visit_window_exists = visit_window_db.check_visit_label_exists(subject.visit_label) + if not visit_window_exists and not create_visit: + validate_subject_error( + subject, + f'Visit label \'{subject.visit_label}\' does not exist in the database (table `Visit_Windows`).' + ) + + +def validate_subject_error(subject: Subject, message: str): + raise ValidateSubjectException(f'Validation error for subject \'{subject.get_name()}\'.\n{message}') From d7360f47d3b96d15498ebd8b4331803a924128e0 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Thu, 8 Aug 2024 13:27:32 -0400 Subject: [PATCH 2/3] use tuple for database query arguments --- python/lib/database_lib/candidate_db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/database_lib/candidate_db.py b/python/lib/database_lib/candidate_db.py index 791b57025..a81ee194a 100644 --- a/python/lib/database_lib/candidate_db.py +++ b/python/lib/database_lib/candidate_db.py @@ -45,6 +45,6 @@ def get_candidate_psc_id(self, cand_id: str | int) -> str | None: 'FROM candidate ' \ 'WHERE CandID = %s' - results = self.db.pselect(query, args=[cand_id]) + results = self.db.pselect(query, args=(cand_id,)) return results[0]['PSCID'] if results else None From 016c65b91a626eb86e155a6843a64331c7acd7e3 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Wed, 14 Aug 2024 11:36:09 -0400 Subject: [PATCH 3/3] query on single line --- python/lib/database_lib/candidate_db.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/lib/database_lib/candidate_db.py b/python/lib/database_lib/candidate_db.py index a81ee194a..eb9eba2b3 100644 --- a/python/lib/database_lib/candidate_db.py +++ b/python/lib/database_lib/candidate_db.py @@ -41,9 +41,7 @@ def get_candidate_psc_id(self, cand_id: str | int) -> str | None: the database. """ - query = 'SELECT PSCID ' \ - 'FROM candidate ' \ - 'WHERE CandID = %s' + query = 'SELECT PSCID FROM candidate WHERE CandID = %s' results = self.db.pselect(query, args=(cand_id,))