Merge pull request #14 from wojiaodawei/feature/add-gliner-predict-params-flatner-threshold-multilabel

MVYaroshenko · web-flow · commit 049f5952f7f8 · 2025-04-11T11:20:33.000+03:00
Now handles following GLiNER predict parameters: Flat_NER, threshold and multi_label
diff --git a/src/utca/implementation/predictors/gliner_predictor/predictor.py b/src/utca/implementation/predictors/gliner_predictor/predictor.py
@@ -70,9 +70,12 @@ def invoke(self, input_data: GLiNERPredictorInput, evaluator: Evaluator) -> Dict
         """
         if not input_data.labels:
             return {"output": [[]]*len(input_data.texts)}
-        labels = set(input_data.labels)
         texts = input_data.texts
-        outputs = self.model.batch_predict_entities(texts=texts, labels=labels) # type: ignore
+        labels = set(input_data.labels)
+        flat_ner = input_data.flat_ner
+        threshold = input_data.threshold
+        multi_label = input_data.multi_label
+        outputs = self.model.batch_predict_entities(texts=texts, labels=labels, flat_ner=flat_ner, threshold=threshold, multi_label=multi_label) # type: ignore
         return ensure_dict(outputs)
 
 
diff --git a/src/utca/implementation/predictors/gliner_predictor/schema.py b/src/utca/implementation/predictors/gliner_predictor/schema.py
@@ -35,8 +35,9 @@ class GLiNERPredictorInput(IOModel):
     """
     texts: List[str]
     labels: List[str]
+    flat_ner: bool = True
     threshold: float = 0.5
-
+    multi_label: bool = False
 
 
 class GLiNERPredictorOutput(IOModel):
diff --git a/src/utca/implementation/tasks/text_processing/ner/gliner_task/actions.py b/src/utca/implementation/tasks/text_processing/ner/gliner_task/actions.py
@@ -20,27 +20,39 @@ class GLiNERPreprocessor(Action[Dict[str, Any], Dict[str, Any]]):
 
             "chunks_starts" (List[int]): Chunks start positions. Used by postprocessor;
                 
+            "flat_ner" (bool): Whether to use flat NER;
+
             "threshold" (float): Minimal score for an entity to put into output;
+        
+            "multi_label" (bool): Whether to allow multiple labels per input;
     """
 
     def __init__(
         self, 
         sents_batch: int=10,
+        flat_ner: bool=True,
         threshold: float=0.5,
+        multi_label: bool=False,
         name: Optional[str]=None,
     ) -> None:
         """
         Args:
             sents_batch (int): Chunks size in sentences. Defaults to 10.
 
-            threshold (float): Minimial score to put entities into the output.
+            flat_ner (bool): Whether to use flat NER. Defaults to True.
+
+            threshold (float): Minimial score to put entities into the output. Defaults to 0.5.
+            
+            multi_label (bool): Whether to allow multiple labels per input. Defaults to False.
 
             name (Optional[str], optional): Name for identification. If equals to None,
                 class name will be used. Defaults to None.
         """
         super().__init__(name)
-        self.threshold = threshold
         self.sents_batch = sents_batch
+        self.flat_ner = flat_ner
+        self.threshold = threshold
+        self.multi_label = multi_label
 
     
     def get_last_sentence_id(self, i: int, sentences_len: int) -> int:
@@ -78,15 +90,21 @@ def execute(
 
                 "chunks_starts" (List[int]): Chunks start positions. Used by postprocessor;
                 
+                "flat_ner" (bool): Whether to use flat NER;
+
                 "threshold" (float): Minimal score for an entity to put into output;
+            
+                "multi_label" (bool): Whether to allow multiple labels per input;
         """
         chunks, chunks_starts = (
             self.chunkanize(input_data["text"])
         )
         return {
             "texts": chunks,
             "chunks_starts": chunks_starts,
+            "flat_ner": self.flat_ner,
             "threshold": self.threshold,
+            "multi_label": self.multi_label,
         }