Skip to content

Commit 24be40b

Browse files
authored
Merge pull request #365 from smart-on-fhir/mikix/diagreport-attachments
feat(deid): keep some fields from DiagnosticReport.presentedForm
2 parents 3e6a17e + a850050 commit 24be40b

File tree

6 files changed

+53
-10
lines changed

6 files changed

+53
-10
lines changed

cumulus_etl/deid/ms-config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@
172172
{"path": "DiagnosticReport.media.link", "method": "keep"},
173173
// Skip DiagnosticReport.conclusion
174174
{"path": "DiagnosticReport.conclusionCode", "method": "keep"},
175-
// Skip DiagnosticReport.presentedForm (can add back later when/if we want to run NLP on it)
175+
{"path": "DiagnosticReport.presentedForm", "method": "keep"}, // will be dropped later after running NLP on it
176176

177177
// ** DocumentReference: https://www.hl7.org/fhir/R4/documentreference.html **
178178
// Skip DocumentReference.masterIdentifier

cumulus_etl/deid/scrubber.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -436,12 +436,14 @@ def _check_text(self, key: str, value: Any) -> Any:
436436
@staticmethod
437437
def _check_attachments(resource_type: str, node_path: str, key: str, value: Any) -> Any:
438438
"""Strip any attachment data"""
439-
if (
440-
resource_type == "DocumentReference"
441-
and node_path == "root.content.attachment"
442-
and key in {"data", "url"}
439+
if any(
440+
(
441+
(resource_type == "DiagnosticReport" and node_path == "root.presentedForm"),
442+
(resource_type == "DocumentReference" and node_path == "root.content.attachment"),
443+
)
443444
):
444-
raise MaskValue
445+
if key in {"data", "url"}:
446+
raise MaskValue
445447

446448
return value
447449

cumulus_etl/fhir/fhir_client.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,9 @@ def create_fhir_client_for_cli(
283283
raise SystemExit(errors.ARGS_INVALID) from exc
284284

285285
client_resources = set(resources)
286-
if "DocumentReference" in client_resources:
287-
# A DocumentReference scope implies a Binary scope as well, since we'll usually need to download attachments
286+
if {"DiagnosticReport", "DocumentReference"} & client_resources:
287+
# Resources with attachments imply a Binary scope as well,
288+
# since we'll usually need to download the referenced content.
288289
client_resources.add("Binary")
289290

290291
return FhirClient(

tests/data/mstool/input/DiagnosticReport.ndjson

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@
2020
}],
2121
"conclusion" : "dropped",
2222
"conclusionCode" : [{ "text": "kept" }],
23-
"presentedForm" : [{ "title": "dropped" }]
23+
"presentedForm" : [{ "data": "xxx", "title": "dropped" }]
2424
}

tests/data/mstool/output/DiagnosticReport.ndjson

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@
2323
"media" : [{
2424
"link" : { "reference": "Media/x" }
2525
}],
26-
"conclusionCode" : [{ "text": "kept" }]
26+
"conclusionCode" : [{ "text": "kept" }],
27+
"presentedForm" : [{ "data": "xxx" }]
2728
}

tests/deid/test_deid_scrubber.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,45 @@ def test_condition(self):
6666
f"Encounter/{scrubber.codebook.fake_id('Encounter', '67890')}",
6767
)
6868

69+
def test_diagnosticreport(self):
70+
"""Verify a basic DiagnosticReport has attachments stripped"""
71+
report = {
72+
"resourceType": "DiagnosticReport",
73+
"id": "dr1",
74+
"presentedForm": [
75+
{
76+
"data": "blarg",
77+
"language": "en",
78+
"size": 5,
79+
},
80+
{
81+
"url": "https://example.com/",
82+
"contentType": "text/plain",
83+
},
84+
],
85+
}
86+
87+
scrubber = Scrubber()
88+
self.assertTrue(scrubber.scrub_resource(report))
89+
self.assertEqual(
90+
report,
91+
{
92+
"resourceType": "DiagnosticReport",
93+
"id": scrubber.codebook.fake_id("DiagnosticReport", "dr1"),
94+
"presentedForm": [
95+
{
96+
"_data": MASKED_EXTENSION,
97+
"language": "en",
98+
"size": 5,
99+
},
100+
{
101+
"_url": MASKED_EXTENSION,
102+
"contentType": "text/plain",
103+
},
104+
],
105+
},
106+
)
107+
69108
def test_documentreference(self):
70109
"""Test DocumentReference, which is interesting because of its list of encounters and attachments"""
71110
docref = {

0 commit comments

Comments
 (0)