@@ -406,23 +406,31 @@ def _check_text(self, key: str, value: Any) -> Any:
406406 if isinstance (value , str ) and any (
407407 (
408408 # Coding.display is clinically unnecessary but is useful for rendering.
409- # Since "code" is always present, downstream consumers can & should provide their own display label.
410- # But we don't remove it entirely, for cases where unexpected codes are used.
411- # Note that this will definitely over-scrub (like scrubbing "White" from the USCDI race extension),
412- # but again -- this display value is redundant and rather than try to be smart, we're safely dumb.
409+ # Since "code" is always present, downstream consumers can & should provide their
410+ # own display label. But we don't remove it entirely, for cases where unexpected
411+ # codes are used. Note that this will definitely over-scrub (like scrubbing "White"
412+ # from the USCDI race extension), but again -- this display value is redundant and
413+ # rather than try to be smart, we're safely dumb.
413414 key == "display" ,
414- # CodeableConcept.text has clinical value for situations that don't have clear coding yet.
415- # Think early-days Covid day PCRs. Which is why we let it through in the first place.
415+ # CodeableConcept.text has clinical value for situations that don't have clear
416+ # coding yet, like early-days Covid day PCRs. And text-only codeable concepts show
417+ # up a lot when the EHR allows it. Hence why we normally let it through.
416418 # But we should still scrub it since it is loose text that could hold PHI.
417419 key == "text" ,
418- # Observation.valueString has clinical value, but could hold PHI.
419- # Similarly, extensions might have valueString members (though the only supported ones don't have
420- # interesting fields there -- race & ethnicity allow for freeform text descriptions).
421- key == "valueString" ,
422420 )
423421 ):
424422 value = self .scrub_text (value )
425423
424+ if isinstance (value , str ) and any (
425+ (
426+ # Observation.valueString has some clinical value, but often holds PHI.
427+ # If we ever want to use this for clinical purposes, we should process it via NLP
428+ # on the ETL side (like we do for DocumentReference attachments).
429+ key == "valueString" ,
430+ )
431+ ):
432+ raise MaskValue
433+
426434 return value
427435
428436 @staticmethod
0 commit comments