2727class  BaseNlpTask (tasks .EtlTask ):
2828    """Base class for any clinical-notes-based NLP task.""" 
2929
30-     resource : ClassVar  =  " DocumentReference"
30+     resource : ClassVar  =  { "DiagnosticReport" ,  " DocumentReference"} 
3131    needs_bulk_deid : ClassVar  =  False 
3232
3333    # You may want to override these in your subclass 
@@ -80,44 +80,45 @@ async def read_notes(
8080        """ 
8181        Iterate through clinical notes. 
8282
83-         :returns: a tuple of original-docref , scrubbed-docref , and clinical  note 
83+         :returns: a tuple of original-resource , scrubbed-resource , and note text  
8484        """ 
8585        warned_connection_error  =  False 
8686
87-         note_filter  =  self .task_config .resource_filter  or  nlp .is_docref_valid 
87+         note_filter  =  self .task_config .resource_filter  or  nlp .is_note_valid 
8888
89-         for  docref  in  self .read_ndjson (progress = progress ):
90-             orig_docref  =  copy .deepcopy (docref )
89+         for  note  in  self .read_ndjson (progress = progress ):
90+             orig_note  =  copy .deepcopy (note )
9191            can_process  =  (
92-                 note_filter (self .scrubber .codebook , docref )
93-                 and  (doc_check  is  None  or  doc_check (docref ))
94-                 and  self .scrubber .scrub_resource (docref , scrub_attachments = False , keep_stats = False )
92+                 note_filter (self .scrubber .codebook , note )
93+                 and  (doc_check  is  None  or  doc_check (note ))
94+                 and  self .scrubber .scrub_resource (note , scrub_attachments = False , keep_stats = False )
9595            )
9696            if  not  can_process :
9797                continue 
9898
9999            try :
100-                 clinical_note  =  await  fhir .get_clinical_note (self .task_config .client , docref )
100+                 note_text  =  await  fhir .get_clinical_note (self .task_config .client , note )
101101            except  cfs .BadAuthArguments  as  exc :
102102                if  not  warned_connection_error :
103103                    # Only warn user about a misconfiguration once per task. 
104104                    # It's not fatal because it might be intentional (partially inlined DocRefs 
105105                    # and the other DocRefs are known failures - BCH hits this with Cerner data). 
106106                    print (exc , file = sys .stderr )
107107                    warned_connection_error  =  True 
108-                 self .add_error (orig_docref )
108+                 self .add_error (orig_note )
109109                continue 
110110            except  Exception  as  exc :
111-                 logging .warning ("Error getting text for docref %s: %s" , docref ["id" ], exc )
112-                 self .add_error (orig_docref )
111+                 orig_note_ref  =  f"{ orig_note ['resourceType' ]}  /{ orig_note ['id' ]}  " 
112+                 logging .warning ("Error getting text for note %s: %s" , orig_note_ref , exc )
113+                 self .add_error (orig_note )
113114                continue 
114115
115-             yield  orig_docref ,  docref ,  clinical_note 
116+             yield  orig_note ,  note ,  note_text 
116117
117118    @staticmethod  
118-     def  remove_trailing_whitespace (note : str ) ->  str :
119+     def  remove_trailing_whitespace (note_text : str ) ->  str :
119120        """Sometimes NLP can be mildly confused by trailing whitespace, so this removes it""" 
120-         return  TRAILING_WHITESPACE .sub ("" , note )
121+         return  TRAILING_WHITESPACE .sub ("" , note_text )
121122
122123
123124class  BaseOpenAiTask (BaseNlpTask ):
@@ -139,59 +140,52 @@ async def init_check(cls) -> None:
139140    async  def  read_entries (self , * , progress : rich .progress .Progress  =  None ) ->  tasks .EntryIterator :
140141        client  =  self .client_class ()
141142
142-         async  for  orig_docref ,  docref ,  orig_clinical_note  in  self .read_notes (progress = progress ):
143+         async  for  orig_note ,  note ,  orig_note_text  in  self .read_notes (progress = progress ):
143144            try :
144-                 docref_id , encounter_id , subject_id  =  nlp .get_docref_info ( docref )
145+                 note_ref , encounter_id , subject_id  =  nlp .get_note_info ( note )
145146            except  KeyError  as  exc :
146147                logging .warning (exc )
147-                 self .add_error (orig_docref )
148+                 self .add_error (orig_note )
148149                continue 
149150
150-             clinical_note  =  self .remove_trailing_whitespace (orig_clinical_note )
151+             note_text  =  self .remove_trailing_whitespace (orig_note_text )
152+             orig_note_ref  =  f"{ orig_note ['resourceType' ]}  /{ orig_note ['id' ]}  " 
151153
152154            try :
153155                completion_class  =  chat .ParsedChatCompletion [self .response_format ]
154156                response  =  await  nlp .cache_wrapper (
155157                    self .task_config .dir_phi ,
156158                    f"{ self .name }  _v{ self .task_version }  " ,
157-                     clinical_note ,
159+                     note_text ,
158160                    lambda  x : completion_class .model_validate_json (x ),  # from file 
159161                    lambda  x : x .model_dump_json (  # to file 
160162                        indent = None , round_trip = True , exclude_unset = True , by_alias = True 
161163                    ),
162164                    client .prompt ,
163165                    self .system_prompt ,
164-                     self .get_user_prompt (clinical_note ),
166+                     self .get_user_prompt (note_text ),
165167                    self .response_format ,
166168                )
167-             except  openai .APIError  as  exc :
168-                 logging .warning (
169-                     f"Could not connect to NLP server for DocRef { orig_docref ['id' ]}  : { exc }  " 
170-                 )
171-                 self .add_error (orig_docref )
172-                 continue 
173-             except  pydantic .ValidationError  as  exc :
174-                 logging .warning (
175-                     f"Could not process answer from NLP server for DocRef { orig_docref ['id' ]}  : { exc }  " 
176-                 )
177-                 self .add_error (orig_docref )
169+             except  Exception  as  exc :
170+                 logging .warning (f"NLP failed for { orig_note_ref }  : { exc }  " )
171+                 self .add_error (orig_note )
178172                continue 
179173
180174            choice  =  response .choices [0 ]
181175
182176            if  choice .finish_reason  !=  "stop"  or  not  choice .message .parsed :
183177                logging .warning (
184-                     f"NLP server response didn't complete for DocRef  { orig_docref [ 'id' ] }  : " 
178+                     f"NLP server response didn't complete for { orig_note_ref }  : " 
185179                    f"{ choice .finish_reason }  " 
186180                )
187-                 self .add_error (orig_docref )
181+                 self .add_error (orig_note )
188182                continue 
189183
190184            parsed  =  choice .message .parsed .model_dump (mode = "json" )
191-             self .post_process (parsed , orig_clinical_note ,  orig_docref )
185+             self .post_process (parsed , orig_note_text ,  orig_note )
192186
193187            yield  {
194-                 "note_ref" : f"DocumentReference/ { docref_id } "  ,
188+                 "note_ref" : note_ref ,
195189                "encounter_ref" : f"Encounter/{ encounter_id }  " ,
196190                "subject_ref" : f"Patient/{ subject_id }  " ,
197191                # Since this date is stored as a string, use UTC time for easy comparisons 
@@ -202,11 +196,11 @@ async def read_entries(self, *, progress: rich.progress.Progress = None) -> task
202196            }
203197
204198    @classmethod  
205-     def  get_user_prompt (cls , clinical_note : str ) ->  str :
199+     def  get_user_prompt (cls , note_text : str ) ->  str :
206200        prompt  =  cls .user_prompt  or  "%CLINICAL-NOTE%" 
207-         return  prompt .replace ("%CLINICAL-NOTE%" , clinical_note )
201+         return  prompt .replace ("%CLINICAL-NOTE%" , note_text )
208202
209-     def  post_process (self , parsed : dict , orig_clinical_note : str , orig_docref : dict ) ->  None :
203+     def  post_process (self , parsed : dict , orig_note_text : str , orig_note : dict ) ->  None :
210204        """Subclasses can fill this out if they like""" 
211205
212206    @classmethod  
@@ -261,7 +255,7 @@ class BaseOpenAiTaskWithSpans(BaseOpenAiTask):
261255    It assumes the field is named "spans" in the top level of the pydantic model. 
262256    """ 
263257
264-     def  post_process (self , parsed : dict , orig_clinical_note : str , orig_docref : dict ) ->  None :
258+     def  post_process (self , parsed : dict , orig_note_text : str , orig_note : dict ) ->  None :
265259        new_spans  =  []
266260        missed_some  =  False 
267261
@@ -278,18 +272,18 @@ def post_process(self, parsed: dict, orig_clinical_note: str, orig_docref: dict)
278272            span  =  ESCAPED_WHITESPACE .sub (r"\\s+" , span )
279273
280274            found  =  False 
281-             for  match  in  re .finditer (span , orig_clinical_note , re .IGNORECASE ):
275+             for  match  in  re .finditer (span , orig_note_text , re .IGNORECASE ):
282276                found  =  True 
283277                new_spans .append (match .span ())
284278            if  not  found :
285279                missed_some  =  True 
286280                logging .warning (
287281                    "Could not match span received from NLP server for " 
288-                     f"DocRef  { orig_docref ['id' ]}  : { orig_span }  " 
282+                     f"{ orig_note [ 'resourceType' ] } / { orig_note ['id' ]}  : { orig_span }  " 
289283                )
290284
291285        if  missed_some :
292-             self .add_error (orig_docref )
286+             self .add_error (orig_note )
293287
294288        parsed ["spans" ] =  new_spans 
295289
0 commit comments