first version of including NER confusion matrix

jtsodapop · jtsodapop · commit d406e0610d70 · 2022-03-07T16:26:34.000-05:00
diff --git a/labelbox/data/metrics/confusion_matrix/calculation.py b/labelbox/data/metrics/confusion_matrix/calculation.py
@@ -2,9 +2,9 @@
 
 import numpy as np
 
-from ..iou.calculation import _get_mask_pairs, _get_vector_pairs, miou
+from ..iou.calculation import _get_mask_pairs, _get_vector_pairs, _get_ner_pairs, miou
 from ...annotation_types import (ObjectAnnotation, ClassificationAnnotation,
-                                 Mask, Geometry, Checklist, Radio,
+                                 Mask, Geometry, Checklist, Radio, TextEntity,
                                  ScalarMetricValue, ConfusionMatrixMetricValue)
 from ..group import (get_feature_pairs, get_identifying_key, has_no_annotations,
                      has_no_matching_annotations)
@@ -68,6 +68,8 @@ def feature_confusion_matrix(
     elif isinstance(predictions[0].value, Geometry):
         return vector_confusion_matrix(ground_truths, predictions,
                                        include_subclasses, iou)
+    elif isinstance(predictions[0].value, TextEntity):
+        pass  #TODO
     elif isinstance(predictions[0], ClassificationAnnotation):
         return classification_confusion_matrix(ground_truths, predictions)
     else:
@@ -288,3 +290,25 @@ def mask_confusion_matrix(ground_truths: List[ObjectAnnotation],
     fn_mask = (prediction_np == 0) & (ground_truth_np == 1)
     tn_mask = prediction_np == ground_truth_np == 0
     return [np.sum(tp_mask), np.sum(fp_mask), np.sum(fn_mask), np.sum(tn_mask)]
+
+
+def ner_confusion_matrix(ground_truths: List[ObjectAnnotation],
+                         predictions: list[ObjectAnnotation],
+                         include_subclasses: bool,
+                         iou: float) -> Optional[ConfusionMatrixMetricValue]:
+    """Computes confusion matrix metric between two lists of TextEntity objects
+    
+    TODO: work on include_subclasses logic
+
+    Args:
+        ground_truths: List of ground truth mask annotations
+        predictions: List of prediction mask annotations
+    Returns:
+        confusion matrix as a list: [TP,FP,TN,FN]
+    """
+    if has_no_matching_annotations(ground_truths, predictions):
+        return [0, int(len(predictions) > 0), 0, int(len(ground_truths) > 0)]
+    elif has_no_annotations(ground_truths, predictions):
+        return None
+    pairs = _get_ner_pairs(ground_truths, predictions)
+    return object_pair_confusion_matrix(pairs, include_subclasses, iou)
diff --git a/labelbox/data/metrics/iou/calculation.py b/labelbox/data/metrics/iou/calculation.py
@@ -4,6 +4,8 @@
 from shapely.geometry import Polygon
 import numpy as np
 
+from labelbox.data.annotation_types.ner import TextEntity
+
 from ..group import get_feature_pairs, get_identifying_key, has_no_annotations, has_no_matching_annotations
 from ...annotation_types import (ObjectAnnotation, ClassificationAnnotation,
                                  Mask, Geometry, Point, Line, Checklist, Text,
@@ -269,3 +271,25 @@ def _ensure_valid_poly(poly):
 def _mask_iou(mask1: np.ndarray, mask2: np.ndarray) -> ScalarMetricValue:
     """Computes iou between two binary segmentation masks."""
     return np.sum(mask1 & mask2) / np.sum(mask1 | mask2)
+
+
+def _get_ner_pairs(
+    ground_truths: List[ObjectAnnotation], predictions: List[ObjectAnnotation]
+) -> List[Tuple[ObjectAnnotation, ObjectAnnotation, ScalarMetricValue]]:
+    """Get iou score for all possible pairs of ground truths and predictions"""
+    pairs = []
+    for ground_truth, prediction in product(ground_truths, predictions):
+        score = _ner_iou(ground_truth.value, prediction.value)
+        pairs.append((ground_truth, prediction, score))
+        # print(ground_truth.value.start, ground_truth.value.end,
+        #       prediction.value.start, prediction.value.end)
+    return pairs
+
+
+def _ner_iou(ner1: TextEntity, ner2: TextEntity):
+    """Computes iou between two text entity annotations"""
+    intersection_start, intersection_end = max(ner1.start, ner2.start), min(
+        ner1.end, ner2.end)
+    union_start, union_end = min(ner1.start,
+                                 ner2.start), max(ner1.end, ner2.end)
+    return (intersection_end - intersection_start) / (union_end - union_start)