Skip to content

Improve Python subject determination and validation error handling #1146

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 8 additions & 15 deletions python/lib/database_lib/candidate_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,16 @@ def __init__(self, db, verbose):
self.db = db
self.verbose = verbose

def check_candid_pscid_combination(self, psc_id, cand_id):
def get_candidate_psc_id(self, cand_id: str | int) -> str | None:
"""
Checks whether the PSCID/CandID combination corresponds to a valid candidate in the `candidate` table.

:param psc_id: PSCID of the candidate
:type psc_id: str
:param cand_id: CandID of the candidate
:type cand_id: int

:returns: the valid CandID and PSCID if the combination corresponds to a candidate, None otherwise
Return a candidate PSCID and based on its CandID, or `None` if no candidate is found in
the database.
"""

query = "SELECT c1.CandID, c2.PSCID AS PSCID " \
" FROM candidate c1 " \
" LEFT JOIN candidate c2 ON (c1.CandID=c2.CandID AND c2.PSCID = %s) " \
" WHERE c1.CandID = %s"
query = 'SELECT PSCID ' \
'FROM candidate ' \
'WHERE CandID = %s'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That could probably be just in one line given how short the query is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine by me, done.


results = self.db.pselect(query=query, args=(psc_id, cand_id))
results = self.db.pselect(query, args=(cand_id,))

return results if results else None
return results[0]['PSCID'] if results else None
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a comment in the old PR, but I still don't see how a join is necessary here. If I query the candidate by CandID, and that the PSCID of the candidate matches the PSCID of the file name, it is a sufficient proof that everying is fine to me.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough. That select statement is derived from the perl code actually. @nicolasbrossard any reason why that query is used instead of just selecting the PSCID based on a CandID (and later on checking that the PSCID in the filename is the same as the PSCID in the database). I have a feeling there was a reason for it but my memory fails me. Hopefully, yours is in a better shape ;)

10 changes: 3 additions & 7 deletions python/lib/database_lib/visit_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,11 @@ def __init__(self, db, verbose):
self.db = db
self.verbose = verbose

def check_visit_label_exits(self, visit_label):
def check_visit_label_exists(self, visit_label: str) -> bool:
"""
Returns a list of dictionaries storing the list of Visit_label present in the Visit_Windows table.

:return: list of dictionaries with the list of Visit_label present in the Visit_Windows table
:rtype: list
Check if a visit label exists in the Visit_Windows database table.
"""

query = 'SELECT Visit_label FROM Visit_Windows WHERE BINARY Visit_label = %s'
results = self.db.pselect(query=query, args=(visit_label,))

return results if results else None
return bool(results)
64 changes: 22 additions & 42 deletions python/lib/dcm2bids_imaging_pipeline_lib/base_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@
import shutil
import sys

from lib.exception.determine_subject_exception import DetermineSubjectException
from lib.exception.validate_subject_exception import ValidateSubjectException
import lib.exitcode
import lib.utilities

from lib.database_lib.candidate_db import CandidateDB
from lib.database_lib.config import Config
from lib.database_lib.visit_windows import VisitWindows
from lib.database import Database
from lib.dicom_archive import DicomArchive
from lib.imaging import Imaging
from lib.log import Log
from lib.imaging_upload import ImagingUpload
from lib.session import Session
from lib.validate_subject_ids import validate_subject_ids


class BasePipeline:
Expand Down Expand Up @@ -105,10 +106,11 @@ def __init__(self, loris_getopt_obj, script_name):
# Grep scanner information based on what is in the DICOM headers
# ---------------------------------------------------------------------------------
if self.dicom_archive_obj.tarchive_info_dict.keys():
self.subject_id_dict = self.imaging_obj.determine_subject_ids(self.dicom_archive_obj.tarchive_info_dict)
if 'error_message' in self.subject_id_dict:
try:
self.subject_id_dict = self.imaging_obj.determine_subject_ids(self.dicom_archive_obj.tarchive_info_dict)
except DetermineSubjectException as exception:
self.log_error_and_exit(
self.subject_id_dict['error_message'],
exception.message,
lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE,
is_error="Y",
is_verbose="N"
Expand Down Expand Up @@ -235,52 +237,30 @@ def validate_subject_ids(self):
"""
Ensure that the subject PSCID/CandID corresponds to a single candidate in the candidate
table and that the visit label can be found in the Visit_Windows table. If those
conditions are not fulfilled, then a 'CandMismatchError' with the validation error
is added to the subject IDs dictionary (subject_id_dict).
conditions are not fulfilled.
"""

psc_id = self.subject_id_dict["PSCID"]
cand_id = self.subject_id_dict["CandID"]
visit_label = self.subject_id_dict["visitLabel"]
is_phantom = self.subject_id_dict["isPhantom"]

# no further checking if the subject is phantom
if is_phantom:
if self.subject_id_dict['isPhantom']:
return

# check that the CandID and PSCID are valid
candidate_db_obj = CandidateDB(self.db, self.verbose)
results = candidate_db_obj.check_candid_pscid_combination(psc_id, cand_id)
if not results:
# if no rows were returned, then the CandID is not valid
self.subject_id_dict["message"] = f"=> Could not find candidate with CandID={cand_id} in the database"
self.subject_id_dict["CandMismatchError"] = "CandID does not exist"
elif not results[0]["PSCID"]:
# if no PSCID returned in the row, then PSCID and CandID do not match
self.subject_id_dict["message"] = "=> PSCID and CandID of the image mismatch"
self.subject_id_dict["CandMismatchError"] = self.subject_id_dict['message']

# check if visit label is valid
visit_windows_obj = VisitWindows(self.db, self.verbose)
results = visit_windows_obj.check_visit_label_exits(visit_label)
if results:
self.subject_id_dict["message"] = f"Found visit label {visit_label} in Visit_Windows"
elif self.subject_id_dict["createVisitLabel"]:
self.subject_id_dict["message"] = f"Will create visit label {visit_label} in Visit_Windows"
else:
self.subject_id_dict["message"] = f"Visit Label {visit_label} does not exist in Visit_Windows"
self.subject_id_dict["CandMismatchError"] = self.subject_id_dict['message']
try:
validate_subject_ids(
self.db,
self.verbose,
self.subject_id_dict['PSCID'],
self.subject_id_dict['CandID'],
self.subject_id_dict['visitLabel'],
bool(self.subject_id_dict['createVisitLabel']),
)

if "CandMismatchError" in self.subject_id_dict.keys():
# if there is a candidate mismatch error, log it but do not exit. It will be logged later in SQL table
self.log_info(self.subject_id_dict["CandMismatchError"], is_error="Y", is_verbose="N")
self.imaging_upload_obj.update_mri_upload(
upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('0',)
upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('1',)
)
else:
self.log_info(self.subject_id_dict["message"], is_error="N", is_verbose="Y")
except ValidateSubjectException as exception:
self.log_info(exception.message, is_error='Y', is_verbose='N')
self.imaging_upload_obj.update_mri_upload(
upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('1',)
upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('0',)
)

def log_error_and_exit(self, message, exit_code, is_error, is_verbose):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import datetime
import getpass
import json
from lib.exception.determine_subject_exception import DetermineSubjectException
from lib.exception.validate_subject_exception import ValidateSubjectException
import lib.exitcode
import lib.utilities as utilities
import os
Expand All @@ -9,6 +11,7 @@
import sys

from lib.dcm2bids_imaging_pipeline_lib.base_pipeline import BasePipeline
from lib.validate_subject_ids import validate_subject_ids

__license__ = "GPLv3"

Expand Down Expand Up @@ -86,19 +89,30 @@ def __init__(self, loris_getopt_obj, script_name):
)
else:
self._determine_subject_ids_based_on_json_patient_name()
self.validate_subject_ids()
if "CandMismatchError" in self.subject_id_dict.keys():

try:
validate_subject_ids(
self.db,
self.verbose,
self.subject_id_dict['PSCID'],
self.subject_id_dict['CandID'],
self.subject_id_dict['visitLabel'],
bool(self.subject_id_dict['createVisitLabel']),
)
except ValidateSubjectException as exception:
self.imaging_obj.insert_mri_candidate_errors(
self.dicom_archive_obj.tarchive_info_dict["PatientName"],
self.dicom_archive_obj.tarchive_info_dict["TarchiveID"],
self.dicom_archive_obj.tarchive_info_dict['PatientName'],
self.dicom_archive_obj.tarchive_info_dict['TarchiveID'],
self.json_file_dict,
self.nifti_path,
self.subject_id_dict["CandMismatchError"]
exception.message,
)

if self.nifti_s3_url: # push candidate errors to S3 if provided file was on S3
self._run_push_to_s3_pipeline()

self.log_error_and_exit(
self.subject_id_dict['CandMismatchError'], lib.exitcode.CANDIDATE_MISMATCH, is_error="Y", is_verbose="N"
exception.message, lib.exitcode.CANDIDATE_MISMATCH, is_error='Y', is_verbose='N'
)

# ---------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -312,11 +326,14 @@ def _determine_subject_ids_based_on_json_patient_name(self):
dicom_value = self.json_file_dict[dicom_header]

try:
self.subject_id_dict = self.config_file.get_subject_ids(self.db, dicom_value, None)
self.subject_id_dict["PatientName"] = dicom_value
except AttributeError:
message = "Config file does not contain a get_subject_ids routine. Upload will exit now."
self.log_error_and_exit(message, lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE, is_error="Y", is_verbose="N")
self.subject_id_dict = self.imaging_obj.determine_subject_ids(dicom_value)
except DetermineSubjectException as exception:
self.log_error_and_exit(
exception.message,
lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE,
is_error='Y',
is_verbose='N'
)

self.log_info("Determined subject IDs based on PatientName stored in JSON file", is_error="N", is_verbose="Y")

Expand Down
10 changes: 10 additions & 0 deletions python/lib/exception/determine_subject_exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class DetermineSubjectException(Exception):
"""
Exception raised if some subject IDs cannot be determined using the config file.
"""

message: str

def __init__(self, message: str):
super().__init__(message)
self.message = message
10 changes: 10 additions & 0 deletions python/lib/exception/validate_subject_exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class ValidateSubjectException(Exception):
"""
Exception raised if some subject IDs validation fails.
"""

message: str

def __init__(self, message: str):
super().__init__(message)
self.message = message
101 changes: 27 additions & 74 deletions python/lib/imaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import datetime
import json
from typing import Any, Optional
import lib.utilities as utilities
import nibabel as nib
import re
Expand All @@ -21,6 +22,7 @@
from lib.database_lib.mri_violations_log import MriViolationsLog
from lib.database_lib.parameter_file import ParameterFile
from lib.database_lib.parameter_type import ParameterType
from lib.exception.determine_subject_exception import DetermineSubjectException

__license__ = "GPLv3"

Expand Down Expand Up @@ -510,93 +512,44 @@ def grep_cand_id_from_file_id(self, file_id):
# return the result
return results[0]['CandID'] if results else None

def determine_subject_ids(self, tarchive_info_dict, scanner_id=None):
def determine_subject_ids(self, tarchive_info_dict, scanner_id: Optional[int] = None) -> dict[str, Any]:
"""
Determine subject IDs based on the DICOM header specified by the lookupCenterNameUsing
config setting. This function will call a function in the config file that can be
config setting. This function will call a function in the configuration file that can be
customized for each project.

:param tarchive_info_dict: dictionary with information about the DICOM archive queried
from the tarchive table
:type tarchive_info_dict: dict
:param scanner_id : ScannerID
:type scanner_id : int or None
:param tarchive_info_dict : Dictionary with information about the DICOM archive queried
from the tarchive table
:param scanner_id : The ScannerID if there is one

:return subject_id_dict: dictionary with subject IDs and visit label or error status
:rtype subject_id_dict: dict
:raises DetermineSubjectException: Exception if the subject IDs cannot be determined from
the configuration file.

:return: Dictionary with subject IDs and visit label.
"""

config_obj = Config(self.db, self.verbose)
dicom_header = config_obj.get_config('lookupCenterNameUsing')
dicom_value = tarchive_info_dict[dicom_header]
subject_name = tarchive_info_dict[dicom_header]

try:
subject_id_dict = self.config_file.get_subject_ids(self.db, dicom_value, scanner_id)
subject_id_dict['PatientName'] = dicom_value
subject_id_dict = self.config_file.get_subject_ids(self.db, subject_name, scanner_id)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering if we should check also that the subject_id_dict from the config file is returning the proper dictionary structure. AKA a key for PSCID, a key for CandID a key for visitLabel, a key of isPhantom and a key for createVisitLabel. Since the scripts relies on that structure, it could be nice to double that the config file follows that structure.

Copy link
Contributor Author

@maximemulder maximemulder Aug 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I AGREE TO THE FULLEST EXTENT. This is actually the next PR that I wanted to discuss. A dictionary is not the right data structure here, because we know exactly what fields we want to be filled. I was thinking of using specific dataclasses that contain exactly the fields we need. This means that current projects need to rewrite their Python configuration but it is a very straightforward change to make and makes this configuration easier and clearer IMO.

EDIT: See #1150.

except AttributeError:
message = 'Config file does not contain a get_subject_ids routine. Upload will exit now.'
return {'error_message': message}

return subject_id_dict

def validate_subject_ids(self, subject_id_dict):
"""
Ensure that the subject PSCID/CandID corresponds to a single candidate in the candidate
table and that the visit label can be found in the Visit_Windows table. If those
conditions are not fulfilled, then a 'CandMismatchError' with the validation error
is added to the subject IDs dictionary (subject_id_dict).

:param subject_id_dict : dictionary with subject IDs and visit label
:type subject_id_dict : dict

:return: True if the subject IDs are valid, False otherwise
:rtype: bool
"""

psc_id = subject_id_dict['PSCID']
cand_id = subject_id_dict['CandID']
visit_label = subject_id_dict['visitLabel']
is_phantom = subject_id_dict['isPhantom']

# no further checking if the subject is phantom
if is_phantom:
return True
raise DetermineSubjectException(
'Config file does not contain a `get_subject_ids` function. Upload will exit now.'
)

# check that the CandID and PSCID are valid
# TODO use candidate_db class for that for bids_import
query = 'SELECT c1.CandID, c2.PSCID AS PSCID ' \
' FROM candidate c1 ' \
' LEFT JOIN candidate c2 ON (c1.CandID=c2.CandID AND c2.PSCID = %s) ' \
' WHERE c1.CandID = %s'
results = self.db.pselect(query=query, args=(psc_id, cand_id))
if not results:
# if no rows were returned, then the CandID is not valid
subject_id_dict['message'] = '=> Could not find candidate with CandID=' + cand_id \
+ ' in the database'
subject_id_dict['CandMismatchError'] = 'CandID does not exist'
return False
elif not results[0]['PSCID']:
# if no PSCID returned in the row, then PSCID and CandID do not match
subject_id_dict['message'] = '=> PSCID and CandID of the image mismatch'
# Message is undefined
subject_id_dict['CandMismatchError'] = subject_id_dict['message']
return False
if subject_id_dict == {}:
raise DetermineSubjectException(
f'Cannot get subject IDs for subject \'{subject_name}\'.\n'
'Possible causes:\n'
'- The subject name is not correctly formatted (should usually be \'PSCID_CandID_VisitLabel\').\n'
'- The function `get_subject_ids` in the Python configuration file is not properly defined.\n'
'- Other project specific reason.'
)

# check if visit label is valid
# TODO use visit_windows class for that for bids_import
query = 'SELECT Visit_label FROM Visit_Windows WHERE BINARY Visit_label = %s'
results = self.db.pselect(query=query, args=(visit_label,))
if results:
subject_id_dict['message'] = f'=> Found visit label {visit_label} in Visit_Windows'
return True
elif subject_id_dict['createVisitLabel']:
subject_id_dict['message'] = f'=> Will create visit label {visit_label} in Visit_Windows'
return True
else:
subject_id_dict['message'] = f'=> Visit Label {visit_label} does not exist in Visit_Windows'
# Message is undefined
subject_id_dict['CandMismatchError'] = subject_id_dict['message']
return False
subject_id_dict['PatientName'] = subject_name
return subject_id_dict

def map_bids_param_to_loris_param(self, file_parameters):
"""
Expand Down Expand Up @@ -1110,7 +1063,7 @@ def get_list_of_files_sorted_by_acq_time(self, files_list):
sorted_files_list = sorted(new_files_list, key=lambda x: x['acq_time'])
except TypeError:
return None

return sorted_files_list

def modify_fmap_json_file_to_write_intended_for(self, sorted_fmap_files_list, s3_obj, tmp_dir):
Expand Down
Loading
Loading