-
Notifications
You must be signed in to change notification settings - Fork 52
Validate subject IDs early in the imaging pipeline #1144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
cd0e205
bf6057d
335ad69
20477bf
361d2cf
ae88d80
298164a
956917c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,8 +63,8 @@ =head2 Methods | |
|
||
|
||
my $profile = ''; | ||
my $upload_id = undef; | ||
my ($debug, $verbose) = (0,1); | ||
my $upload_id = undef; | ||
my ($debug, $verbose) = (0,0); | ||
my $stdout = ''; | ||
my $stderr = ''; | ||
|
||
|
@@ -158,7 +158,7 @@ =head2 Methods | |
|
||
|
||
|
||
my ($stdoutbase, $stderrbase) = ("$data_dir/batch_output/imuploadstdout.log", | ||
my ($stdoutbase, $stderrbase) = ("$data_dir/batch_output/imuploadstdout.log", | ||
"$data_dir/batch_output/imuploadstderr.log"); | ||
|
||
while($_ = $ARGV[0] // '', /^-/) { | ||
|
@@ -201,7 +201,7 @@ =head2 Methods | |
my $phantom = $phantomarray[$counter-1]; | ||
my $patientname = $patientnamearray[$counter-1]; | ||
|
||
## Ensure that | ||
## Ensure that | ||
## 1) the uploaded file is of type .tgz or .tar.gz or .zip | ||
## 2) check that input file provides phantom details (Y for phantom, N for real candidates) | ||
## 3) for non-phantoms, the patient name and path entries are identical; this mimics the imaging uploader in the front-end | ||
|
@@ -229,13 +229,34 @@ =head2 Methods | |
print STDERR "Please leave the patient name blank for phantom " | ||
. "entries\n"; | ||
exit $NeuroDB::ExitCodes::PNAME_FILENAME_MISMATCH; | ||
} | ||
else { | ||
$patientname = 'NULL'; | ||
} | ||
} | ||
else { | ||
$patientname = 'NULL'; | ||
} | ||
} | ||
|
||
## Populate the mri_upload table with necessary entries and get an upload_id | ||
## Check the subject information before inserting anything | ||
|
||
if ($phantom eq 'N') { | ||
my $python_config = $configOB->getPythonConfigFile(); | ||
|
||
my $command = sprintf( | ||
"validate_subject_ids.py --profile %s --subject %s", | ||
$python_config, | ||
$patientname, | ||
); | ||
|
||
if ($verbose) { | ||
$command .= " --verbose"; | ||
} | ||
|
||
if (system($command) != 0) { | ||
# The error is already printed by the Python script, just exit | ||
exit $NeuroDB::ExitCodes::CANDIDATE_MISMATCH; | ||
Comment on lines
+253
to
+255
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would fail silently. I think the data should still be inserted in the imaging uploader but with a failure status instead. |
||
} | ||
} | ||
|
||
## Populate the mri_upload table with necessary entries and get an upload_id | ||
|
||
$upload_id = insertIntoMRIUpload(\$dbh, | ||
$patientname, | ||
|
@@ -259,8 +280,8 @@ =head2 Methods | |
} | ||
##if qsub is not enabled | ||
else { | ||
print "Running now the following command: $command\n" if $verbose; | ||
system($command); | ||
print "Running now the following command: $command\n" if $verbose; | ||
system($command); | ||
} | ||
|
||
push @submitted, $input; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,23 +35,16 @@ def __init__(self, db, verbose): | |
self.db = db | ||
self.verbose = verbose | ||
|
||
def check_candid_pscid_combination(self, psc_id, cand_id): | ||
def get_candidate_psc_id(self, cand_id: str | int) -> str | None: | ||
""" | ||
Checks whether the PSCID/CandID combination corresponds to a valid candidate in the `candidate` table. | ||
|
||
:param psc_id: PSCID of the candidate | ||
:type psc_id: str | ||
:param cand_id: CandID of the candidate | ||
:type cand_id: int | ||
|
||
:returns: the valid CandID and PSCID if the combination corresponds to a candidate, None otherwise | ||
Return a candidate PSCID and based on its CandID, or `None` if no candidate is found in | ||
the database. | ||
""" | ||
|
||
query = "SELECT c1.CandID, c2.PSCID AS PSCID " \ | ||
" FROM candidate c1 " \ | ||
" LEFT JOIN candidate c2 ON (c1.CandID=c2.CandID AND c2.PSCID = %s) " \ | ||
" WHERE c1.CandID = %s" | ||
query = 'SELECT PSCID ' \ | ||
'FROM candidate ' \ | ||
'WHERE CandID = %s' | ||
Comment on lines
+44
to
+46
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you should revert back to the old SQL. There is a reason why there are those joins. It allows checking the the PSCID and DCCID refers to the same candidate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whoops, I forgot that the PSCID is not unique, my mistake ! EDIT: Actually, even with duplicate PSCIDs, I don't see how the new code could fail. It seems to me that this join is kinda useless :
seems equivalent to me to (omitting
|
||
|
||
results = self.db.pselect(query=query, args=(psc_id, cand_id)) | ||
results = self.db.pselect(query, args=[cand_id]) | ||
|
||
return results if results else None | ||
return results[0]['PSCID'] if results else None |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,7 +35,7 @@ def __init__(self, db, verbose): | |
self.db = db | ||
self.verbose = verbose | ||
|
||
def check_visit_label_exits(self, visit_label): | ||
def check_visit_label_exits(self, visit_label: str) -> bool: | ||
""" | ||
Returns a list of dictionaries storing the list of Visit_label present in the Visit_Windows table. | ||
|
||
|
@@ -44,6 +44,5 @@ def check_visit_label_exits(self, visit_label): | |
""" | ||
|
||
query = 'SELECT Visit_label FROM Visit_Windows WHERE BINARY Visit_label = %s' | ||
results = self.db.pselect(query=query, args=(visit_label,)) | ||
|
||
return results if results else None | ||
results = self.db.pselect(query=query, args=[visit_label]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that would break since pselect args expects a tuple. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code worked fine (on my machine™) with the lists and I saw a few places that use lists (in |
||
return bool(results) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you checked that changing the return value type does not break any other code? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep ! The function only has one reference in the codebase (the place I changed). |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,18 +3,19 @@ | |
import shutil | ||
import sys | ||
|
||
from lib.exception.determine_subject_exception import DetermineSubjectException | ||
from lib.exception.validate_subject_exception import ValidateSubjectException | ||
import lib.exitcode | ||
import lib.utilities | ||
|
||
from lib.database_lib.candidate_db import CandidateDB | ||
from lib.database_lib.config import Config | ||
from lib.database_lib.visit_windows import VisitWindows | ||
from lib.database import Database | ||
from lib.dicom_archive import DicomArchive | ||
from lib.imaging import Imaging | ||
from lib.log import Log | ||
from lib.imaging_upload import ImagingUpload | ||
from lib.session import Session | ||
from lib.validate_subject_ids import validate_subject_parts | ||
|
||
|
||
class BasePipeline: | ||
|
@@ -105,10 +106,11 @@ def __init__(self, loris_getopt_obj, script_name): | |
# Grep scanner information based on what is in the DICOM headers | ||
# --------------------------------------------------------------------------------- | ||
if self.dicom_archive_obj.tarchive_info_dict.keys(): | ||
self.subject_id_dict = self.imaging_obj.determine_subject_ids(self.dicom_archive_obj.tarchive_info_dict) | ||
if 'error_message' in self.subject_id_dict: | ||
try: | ||
self.subject_id_dict = self.imaging_obj.determine_subject_ids(self.dicom_archive_obj.tarchive_info_dict) | ||
except DetermineSubjectException as exception: | ||
self.log_error_and_exit( | ||
self.subject_id_dict['error_message'], | ||
exception.message, | ||
Comment on lines
+111
to
+113
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice the exception refactoring! Have you checked that determine_subject_ids is not used anywhere else? To make sure nothing gets broken by this refactoring. Thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep ! I did change the error management in all the places this function is called. There is just one place which had no error management at all, so I did not add any: |
||
lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE, | ||
is_error="Y", | ||
is_verbose="N" | ||
|
@@ -235,52 +237,30 @@ def validate_subject_ids(self): | |
""" | ||
Ensure that the subject PSCID/CandID corresponds to a single candidate in the candidate | ||
table and that the visit label can be found in the Visit_Windows table. If those | ||
conditions are not fulfilled, then a 'CandMismatchError' with the validation error | ||
is added to the subject IDs dictionary (subject_id_dict). | ||
conditions are not fulfilled. | ||
""" | ||
|
||
psc_id = self.subject_id_dict["PSCID"] | ||
cand_id = self.subject_id_dict["CandID"] | ||
visit_label = self.subject_id_dict["visitLabel"] | ||
is_phantom = self.subject_id_dict["isPhantom"] | ||
|
||
# no further checking if the subject is phantom | ||
if is_phantom: | ||
if self.subject_id_dict['isPhantom']: | ||
return | ||
|
||
# check that the CandID and PSCID are valid | ||
candidate_db_obj = CandidateDB(self.db, self.verbose) | ||
results = candidate_db_obj.check_candid_pscid_combination(psc_id, cand_id) | ||
if not results: | ||
# if no rows were returned, then the CandID is not valid | ||
self.subject_id_dict["message"] = f"=> Could not find candidate with CandID={cand_id} in the database" | ||
self.subject_id_dict["CandMismatchError"] = "CandID does not exist" | ||
elif not results[0]["PSCID"]: | ||
# if no PSCID returned in the row, then PSCID and CandID do not match | ||
self.subject_id_dict["message"] = "=> PSCID and CandID of the image mismatch" | ||
self.subject_id_dict["CandMismatchError"] = self.subject_id_dict['message'] | ||
|
||
# check if visit label is valid | ||
visit_windows_obj = VisitWindows(self.db, self.verbose) | ||
results = visit_windows_obj.check_visit_label_exits(visit_label) | ||
if results: | ||
self.subject_id_dict["message"] = f"Found visit label {visit_label} in Visit_Windows" | ||
elif self.subject_id_dict["createVisitLabel"]: | ||
self.subject_id_dict["message"] = f"Will create visit label {visit_label} in Visit_Windows" | ||
else: | ||
self.subject_id_dict["message"] = f"Visit Label {visit_label} does not exist in Visit_Windows" | ||
self.subject_id_dict["CandMismatchError"] = self.subject_id_dict['message'] | ||
try: | ||
validate_subject_parts( | ||
self.db, | ||
self.verbose, | ||
self.subject_id_dict['PSCID'], | ||
self.subject_id_dict['CandID'], | ||
self.subject_id_dict['visitLabel'], | ||
bool(self.subject_id_dict['createVisitLabel']), | ||
) | ||
|
||
if "CandMismatchError" in self.subject_id_dict.keys(): | ||
# if there is a candidate mismatch error, log it but do not exit. It will be logged later in SQL table | ||
self.log_info(self.subject_id_dict["CandMismatchError"], is_error="Y", is_verbose="N") | ||
self.imaging_upload_obj.update_mri_upload( | ||
upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('0',) | ||
upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('1',) | ||
) | ||
else: | ||
self.log_info(self.subject_id_dict["message"], is_error="N", is_verbose="Y") | ||
except ValidateSubjectException as exception: | ||
self.log_info(exception.message, is_error='Y', is_verbose='N') | ||
self.imaging_upload_obj.update_mri_upload( | ||
upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('1',) | ||
upload_id=self.upload_id, fields=('IsCandidateInfoValidated',), values=('0',) | ||
) | ||
|
||
def log_error_and_exit(self, message, exit_code, is_error, is_verbose): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
class DetermineSubjectException(Exception): | ||
""" | ||
Exception raised if some subject IDs cannot be determined using the config file. | ||
""" | ||
message: str | ||
|
||
def __init__(self, message: str): | ||
super().__init__(message) | ||
self.message = message |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
class ValidateSubjectException(Exception): | ||
""" | ||
Exception raised if some subject IDs validation fails. | ||
""" | ||
message: str | ||
|
||
def __init__(self, message: str): | ||
super().__init__(message) | ||
self.message = message |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why has this changed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, maybe I should have proposed this change in a separate PR indeed.
The current line makes the imaging pipeline always verbose, no matter if the
--verbose
argument is provided or not. When calling the Python script from Perl, it becomes very noticeable since all database queries are logged.