-
Notifications
You must be signed in to change notification settings - Fork 52
Improve Python subject determination and validation error handling #1146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,23 +35,16 @@ def __init__(self, db, verbose): | |
self.db = db | ||
self.verbose = verbose | ||
|
||
def check_candid_pscid_combination(self, psc_id, cand_id): | ||
def get_candidate_psc_id(self, cand_id: str | int) -> str | None: | ||
""" | ||
Checks whether the PSCID/CandID combination corresponds to a valid candidate in the `candidate` table. | ||
|
||
:param psc_id: PSCID of the candidate | ||
:type psc_id: str | ||
:param cand_id: CandID of the candidate | ||
:type cand_id: int | ||
|
||
:returns: the valid CandID and PSCID if the combination corresponds to a candidate, None otherwise | ||
Return a candidate PSCID and based on its CandID, or `None` if no candidate is found in | ||
the database. | ||
""" | ||
|
||
query = "SELECT c1.CandID, c2.PSCID AS PSCID " \ | ||
" FROM candidate c1 " \ | ||
" LEFT JOIN candidate c2 ON (c1.CandID=c2.CandID AND c2.PSCID = %s) " \ | ||
" WHERE c1.CandID = %s" | ||
query = 'SELECT PSCID ' \ | ||
'FROM candidate ' \ | ||
'WHERE CandID = %s' | ||
|
||
results = self.db.pselect(query=query, args=(psc_id, cand_id)) | ||
results = self.db.pselect(query, args=(cand_id,)) | ||
|
||
return results if results else None | ||
return results[0]['PSCID'] if results else None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was a comment in the old PR, but I still don't see how a join is necessary here. If I query the candidate by CandID, and that the PSCID of the candidate matches the PSCID of the file name, it is a sufficient proof that everying is fine to me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fair enough. That select statement is derived from the perl code actually. @nicolasbrossard any reason why that query is used instead of just selecting the PSCID based on a CandID (and later on checking that the PSCID in the filename is the same as the PSCID in the database). I have a feeling there was a reason for it but my memory fails me. Hopefully, yours is in a better shape ;) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
class DetermineSubjectException(Exception): | ||
""" | ||
Exception raised if some subject IDs cannot be determined using the config file. | ||
""" | ||
|
||
message: str | ||
|
||
def __init__(self, message: str): | ||
super().__init__(message) | ||
self.message = message |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
class ValidateSubjectException(Exception): | ||
""" | ||
Exception raised if some subject IDs validation fails. | ||
""" | ||
|
||
message: str | ||
|
||
def __init__(self, message: str): | ||
super().__init__(message) | ||
self.message = message |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
import os | ||
import datetime | ||
import json | ||
from typing import Any, Optional | ||
import lib.utilities as utilities | ||
import nibabel as nib | ||
import re | ||
|
@@ -21,6 +22,7 @@ | |
from lib.database_lib.mri_violations_log import MriViolationsLog | ||
from lib.database_lib.parameter_file import ParameterFile | ||
from lib.database_lib.parameter_type import ParameterType | ||
from lib.exception.determine_subject_exception import DetermineSubjectException | ||
|
||
__license__ = "GPLv3" | ||
|
||
|
@@ -510,93 +512,44 @@ def grep_cand_id_from_file_id(self, file_id): | |
# return the result | ||
return results[0]['CandID'] if results else None | ||
|
||
def determine_subject_ids(self, tarchive_info_dict, scanner_id=None): | ||
def determine_subject_ids(self, tarchive_info_dict, scanner_id: Optional[int] = None) -> dict[str, Any]: | ||
""" | ||
Determine subject IDs based on the DICOM header specified by the lookupCenterNameUsing | ||
config setting. This function will call a function in the config file that can be | ||
config setting. This function will call a function in the configuration file that can be | ||
customized for each project. | ||
|
||
:param tarchive_info_dict: dictionary with information about the DICOM archive queried | ||
from the tarchive table | ||
:type tarchive_info_dict: dict | ||
:param scanner_id : ScannerID | ||
:type scanner_id : int or None | ||
:param tarchive_info_dict : Dictionary with information about the DICOM archive queried | ||
from the tarchive table | ||
:param scanner_id : The ScannerID if there is one | ||
|
||
:return subject_id_dict: dictionary with subject IDs and visit label or error status | ||
:rtype subject_id_dict: dict | ||
:raises DetermineSubjectException: Exception if the subject IDs cannot be determined from | ||
the configuration file. | ||
|
||
:return: Dictionary with subject IDs and visit label. | ||
""" | ||
|
||
config_obj = Config(self.db, self.verbose) | ||
dicom_header = config_obj.get_config('lookupCenterNameUsing') | ||
dicom_value = tarchive_info_dict[dicom_header] | ||
subject_name = tarchive_info_dict[dicom_header] | ||
|
||
try: | ||
subject_id_dict = self.config_file.get_subject_ids(self.db, dicom_value, scanner_id) | ||
subject_id_dict['PatientName'] = dicom_value | ||
subject_id_dict = self.config_file.get_subject_ids(self.db, subject_name, scanner_id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am wondering if we should check also that the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I AGREE TO THE FULLEST EXTENT. This is actually the next PR that I wanted to discuss. A dictionary is not the right data structure here, because we know exactly what fields we want to be filled. I was thinking of using specific dataclasses that contain exactly the fields we need. This means that current projects need to rewrite their Python configuration but it is a very straightforward change to make and makes this configuration easier and clearer IMO. EDIT: See #1150. |
||
except AttributeError: | ||
message = 'Config file does not contain a get_subject_ids routine. Upload will exit now.' | ||
return {'error_message': message} | ||
|
||
return subject_id_dict | ||
|
||
def validate_subject_ids(self, subject_id_dict): | ||
""" | ||
Ensure that the subject PSCID/CandID corresponds to a single candidate in the candidate | ||
table and that the visit label can be found in the Visit_Windows table. If those | ||
conditions are not fulfilled, then a 'CandMismatchError' with the validation error | ||
is added to the subject IDs dictionary (subject_id_dict). | ||
|
||
:param subject_id_dict : dictionary with subject IDs and visit label | ||
:type subject_id_dict : dict | ||
|
||
:return: True if the subject IDs are valid, False otherwise | ||
:rtype: bool | ||
""" | ||
|
||
psc_id = subject_id_dict['PSCID'] | ||
cand_id = subject_id_dict['CandID'] | ||
visit_label = subject_id_dict['visitLabel'] | ||
is_phantom = subject_id_dict['isPhantom'] | ||
|
||
# no further checking if the subject is phantom | ||
if is_phantom: | ||
return True | ||
raise DetermineSubjectException( | ||
'Config file does not contain a `get_subject_ids` function. Upload will exit now.' | ||
) | ||
|
||
# check that the CandID and PSCID are valid | ||
# TODO use candidate_db class for that for bids_import | ||
query = 'SELECT c1.CandID, c2.PSCID AS PSCID ' \ | ||
' FROM candidate c1 ' \ | ||
' LEFT JOIN candidate c2 ON (c1.CandID=c2.CandID AND c2.PSCID = %s) ' \ | ||
' WHERE c1.CandID = %s' | ||
results = self.db.pselect(query=query, args=(psc_id, cand_id)) | ||
if not results: | ||
# if no rows were returned, then the CandID is not valid | ||
subject_id_dict['message'] = '=> Could not find candidate with CandID=' + cand_id \ | ||
+ ' in the database' | ||
subject_id_dict['CandMismatchError'] = 'CandID does not exist' | ||
return False | ||
elif not results[0]['PSCID']: | ||
# if no PSCID returned in the row, then PSCID and CandID do not match | ||
subject_id_dict['message'] = '=> PSCID and CandID of the image mismatch' | ||
# Message is undefined | ||
subject_id_dict['CandMismatchError'] = subject_id_dict['message'] | ||
return False | ||
if subject_id_dict == {}: | ||
raise DetermineSubjectException( | ||
f'Cannot get subject IDs for subject \'{subject_name}\'.\n' | ||
'Possible causes:\n' | ||
'- The subject name is not correctly formatted (should usually be \'PSCID_CandID_VisitLabel\').\n' | ||
'- The function `get_subject_ids` in the Python configuration file is not properly defined.\n' | ||
'- Other project specific reason.' | ||
) | ||
|
||
# check if visit label is valid | ||
# TODO use visit_windows class for that for bids_import | ||
query = 'SELECT Visit_label FROM Visit_Windows WHERE BINARY Visit_label = %s' | ||
results = self.db.pselect(query=query, args=(visit_label,)) | ||
if results: | ||
subject_id_dict['message'] = f'=> Found visit label {visit_label} in Visit_Windows' | ||
return True | ||
elif subject_id_dict['createVisitLabel']: | ||
subject_id_dict['message'] = f'=> Will create visit label {visit_label} in Visit_Windows' | ||
return True | ||
else: | ||
subject_id_dict['message'] = f'=> Visit Label {visit_label} does not exist in Visit_Windows' | ||
# Message is undefined | ||
subject_id_dict['CandMismatchError'] = subject_id_dict['message'] | ||
return False | ||
subject_id_dict['PatientName'] = subject_name | ||
return subject_id_dict | ||
|
||
def map_bids_param_to_loris_param(self, file_parameters): | ||
""" | ||
|
@@ -1110,7 +1063,7 @@ def get_list_of_files_sorted_by_acq_time(self, files_list): | |
sorted_files_list = sorted(new_files_list, key=lambda x: x['acq_time']) | ||
except TypeError: | ||
return None | ||
|
||
return sorted_files_list | ||
|
||
def modify_fmap_json_file_to_write_intended_for(self, sorted_fmap_files_list, s3_obj, tmp_dir): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That could probably be just in one line given how short the query is.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fine by me, done.