Merge pull request #486 from Labelbox/al-1723

jtsodapop · web-flow · commit 526fb8235c24 · 2022-03-17T09:44:20.000-04:00
[AL-1723] NER Confusion Matrix
diff --git a/labelbox/data/metrics/confusion_matrix/calculation.py b/labelbox/data/metrics/confusion_matrix/calculation.py
@@ -2,9 +2,9 @@
 
 import numpy as np
 
-from ..iou.calculation import _get_mask_pairs, _get_vector_pairs, miou
+from ..iou.calculation import _get_mask_pairs, _get_vector_pairs, _get_ner_pairs, miou
 from ...annotation_types import (ObjectAnnotation, ClassificationAnnotation,
-                                 Mask, Geometry, Checklist, Radio,
+                                 Mask, Geometry, Checklist, Radio, TextEntity,
                                  ScalarMetricValue, ConfusionMatrixMetricValue)
 from ..group import (get_feature_pairs, get_identifying_key, has_no_annotations,
                      has_no_matching_annotations)
@@ -68,6 +68,9 @@ def feature_confusion_matrix(
     elif isinstance(predictions[0].value, Geometry):
         return vector_confusion_matrix(ground_truths, predictions,
                                        include_subclasses, iou)
+    elif isinstance(predictions[0].value, TextEntity):
+        return ner_confusion_matrix(ground_truths, predictions,
+                                    include_subclasses, iou)
     elif isinstance(predictions[0], ClassificationAnnotation):
         return classification_confusion_matrix(ground_truths, predictions)
     else:
@@ -288,3 +291,23 @@ def mask_confusion_matrix(ground_truths: List[ObjectAnnotation],
     fn_mask = (prediction_np == 0) & (ground_truth_np == 1)
     tn_mask = prediction_np == ground_truth_np == 0
     return [np.sum(tp_mask), np.sum(fp_mask), np.sum(fn_mask), np.sum(tn_mask)]
+
+
+def ner_confusion_matrix(ground_truths: List[ObjectAnnotation],
+                         predictions: List[ObjectAnnotation],
+                         include_subclasses: bool,
+                         iou: float) -> Optional[ConfusionMatrixMetricValue]:
+    """Computes confusion matrix metric between two lists of TextEntity objects
+
+    Args:
+        ground_truths: List of ground truth mask annotations
+        predictions: List of prediction mask annotations
+    Returns:
+        confusion matrix as a list: [TP,FP,TN,FN]
+    """
+    if has_no_matching_annotations(ground_truths, predictions):
+        return [0, int(len(predictions) > 0), 0, int(len(ground_truths) > 0)]
+    elif has_no_annotations(ground_truths, predictions):
+        return None
+    pairs = _get_ner_pairs(ground_truths, predictions)
+    return object_pair_confusion_matrix(pairs, include_subclasses, iou)
diff --git a/labelbox/data/metrics/iou/calculation.py b/labelbox/data/metrics/iou/calculation.py
@@ -7,7 +7,7 @@
 from ..group import get_feature_pairs, get_identifying_key, has_no_annotations, has_no_matching_annotations
 from ...annotation_types import (ObjectAnnotation, ClassificationAnnotation,
                                  Mask, Geometry, Point, Line, Checklist, Text,
-                                 Radio, ScalarMetricValue)
+                                 TextEntity, Radio, ScalarMetricValue)
 
 
 def miou(ground_truths: List[Union[ObjectAnnotation, ClassificationAnnotation]],
@@ -61,6 +61,8 @@ def feature_miou(ground_truths: List[Union[ObjectAnnotation,
         return vector_miou(ground_truths, predictions, include_subclasses)
     elif isinstance(predictions[0], ClassificationAnnotation):
         return classification_miou(ground_truths, predictions)
+    elif isinstance(predictions[0].value, TextEntity):
+        return ner_miou(ground_truths, predictions, include_subclasses)
     else:
         raise ValueError(
             f"Unexpected annotation found. Found {type(predictions[0].value)}")
@@ -269,3 +271,51 @@ def _ensure_valid_poly(poly):
 def _mask_iou(mask1: np.ndarray, mask2: np.ndarray) -> ScalarMetricValue:
     """Computes iou between two binary segmentation masks."""
     return np.sum(mask1 & mask2) / np.sum(mask1 | mask2)
+
+
+def _get_ner_pairs(
+    ground_truths: List[ObjectAnnotation], predictions: List[ObjectAnnotation]
+) -> List[Tuple[ObjectAnnotation, ObjectAnnotation, ScalarMetricValue]]:
+    """Get iou score for all possible pairs of ground truths and predictions"""
+    pairs = []
+    for ground_truth, prediction in product(ground_truths, predictions):
+        score = _ner_iou(ground_truth.value, prediction.value)
+        pairs.append((ground_truth, prediction, score))
+    return pairs
+
+
+def _ner_iou(ner1: TextEntity, ner2: TextEntity):
+    """Computes iou between two text entity annotations"""
+    intersection_start, intersection_end = max(ner1.start, ner2.start), min(
+        ner1.end, ner2.end)
+    union_start, union_end = min(ner1.start,
+                                 ner2.start), max(ner1.end, ner2.end)
+    #edge case of only one character in text
+    if union_start == union_end:
+        return 1
+    #if there is no intersection
+    if intersection_start > intersection_end:
+        return 0
+    return (intersection_end - intersection_start) / (union_end - union_start)
+
+
+def ner_miou(ground_truths: List[ObjectAnnotation],
+             predictions: List[ObjectAnnotation],
+             include_subclasses: bool) -> Optional[ScalarMetricValue]:
+    """
+    Computes iou score for all features with the same feature schema id.
+    Calculation includes subclassifications.
+
+    Args:
+        ground_truths: List of ground truth ner annotations
+        predictions: List of prediction ner annotations
+    Returns:
+        float representing the iou score for the feature type.
+         If there are no matches then this returns none
+    """
+    if has_no_matching_annotations(ground_truths, predictions):
+        return 0.
+    elif has_no_annotations(ground_truths, predictions):
+        return None
+    pairs = _get_ner_pairs(ground_truths, predictions)
+    return object_pair_miou(pairs, include_subclasses)
diff --git a/tests/data/metrics/confusion_matrix/conftest.py b/tests/data/metrics/confusion_matrix/conftest.py
@@ -6,6 +6,8 @@
 from labelbox.data.annotation_types import Polygon, Point, Rectangle, Mask, MaskData, Line, Radio, Text, Checklist, ClassificationAnswer
 import numpy as np
 
+from labelbox.data.annotation_types.ner import TextEntity
+
 
 class NameSpace(SimpleNamespace):
 
@@ -84,6 +86,13 @@ def get_checklist(name, answer_names):
                                     ]))
 
 
+def get_ner(name, start, end, subclasses=None):
+    return ObjectAnnotation(
+        name=name,
+        value=TextEntity(start=start, end=end),
+        classifications=[] if subclasses is None else subclasses)
+
+
 def get_object_pairs(tool_fn, **kwargs):
     return [
         NameSpace(predictions=[tool_fn("cat", **kwargs)],
@@ -326,6 +335,11 @@ def point_pairs():
     return get_object_pairs(get_point, x=0, y=0)
 
 
+@pytest.fixture
+def ner_pairs():
+    return get_object_pairs(get_ner, start=0, end=10)
+
+
 @pytest.fixture()
 def pair_iou_thresholds():
     return [
diff --git a/tests/data/metrics/confusion_matrix/test_confusion_matrix_data_row.py b/tests/data/metrics/confusion_matrix/test_confusion_matrix_data_row.py
@@ -9,7 +9,8 @@
     fixture_ref('rectangle_pairs'),
     fixture_ref('mask_pairs'),
     fixture_ref('line_pairs'),
-    fixture_ref('point_pairs')
+    fixture_ref('point_pairs'),
+    fixture_ref('ner_pairs')
 ])
 def test_overlapping_objects(tool_examples):
     for example in tool_examples:
diff --git a/tests/data/metrics/confusion_matrix/test_confusion_matrix_feature.py b/tests/data/metrics/confusion_matrix/test_confusion_matrix_feature.py
@@ -9,7 +9,8 @@
     fixture_ref('rectangle_pairs'),
     fixture_ref('mask_pairs'),
     fixture_ref('line_pairs'),
-    fixture_ref('point_pairs')
+    fixture_ref('point_pairs'),
+    fixture_ref('ner_pairs')
 ])
 def test_overlapping_objects(tool_examples):
     for example in tool_examples:
diff --git a/tests/data/metrics/iou/data_row/conftest.py b/tests/data/metrics/iou/data_row/conftest.py
@@ -320,45 +320,42 @@ def empty_radio_prediction():
 
 @pytest.fixture
 def matching_checklist():
-    return NameSpace(
-        labels=[],
-        classifications=[{
-            'featureId':
-                '1234567890111213141516171',
-            'schemaId':
-                'ckppid25v0000aeyjmxfwlc7t',
-            'uuid':
-                '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
-            'schemaId':
-                'ckppid25v0000aeyjmxfwlc7t',
-            'answers': [{
-                'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
-            }, {
-                'schemaId': 'ckppide010001aeyj0yhiaghc'
-            }, {
-                'schemaId': 'ckppidq4u0002aeyjmcc4toxw'
-            }]
-        }],
-        predictions=[{
-            'uuid':
-                '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
-            'schemaId':
-                'ckppid25v0000aeyjmxfwlc7t',
-            'dataRow': {
-                'id': 'ckppihxc10005aeyjen11h7jh'
-            },
-            'answers': [{
-                'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
-            }, {
-                'schemaId': 'ckppide010001aeyj0yhiaghc'
-            }, {
-                'schemaId': 'ckppidq4u0002aeyjmcc4toxw'
-            }]
-        }],
-        data_row_expected=1.,
-        #  expected = [1.]
-        #  expected=[1., 1., 1.])
-        expected={1.0: 3})
+    return NameSpace(labels=[],
+                     classifications=[{
+                         'featureId':
+                             '1234567890111213141516171',
+                         'schemaId':
+                             'ckppid25v0000aeyjmxfwlc7t',
+                         'uuid':
+                             '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
+                         'schemaId':
+                             'ckppid25v0000aeyjmxfwlc7t',
+                         'answers': [{
+                             'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+                         }, {
+                             'schemaId': 'ckppide010001aeyj0yhiaghc'
+                         }, {
+                             'schemaId': 'ckppidq4u0002aeyjmcc4toxw'
+                         }]
+                     }],
+                     predictions=[{
+                         'uuid':
+                             '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
+                         'schemaId':
+                             'ckppid25v0000aeyjmxfwlc7t',
+                         'dataRow': {
+                             'id': 'ckppihxc10005aeyjen11h7jh'
+                         },
+                         'answers': [{
+                             'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+                         }, {
+                             'schemaId': 'ckppide010001aeyj0yhiaghc'
+                         }, {
+                             'schemaId': 'ckppidq4u0002aeyjmcc4toxw'
+                         }]
+                     }],
+                     data_row_expected=1.,
+                     expected={1.0: 3})
 
 
 @pytest.fixture
@@ -699,3 +696,84 @@ def point_pair():
                          }
                      }],
                      expected=0.879113232477017)
+
+
+@pytest.fixture
+def matching_ner():
+    return NameSpace(labels=[{
+        'featureId': 'ckppivl7p0006aeyj92cezr9d',
+        'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+        'format': "text.location",
+        'data': {
+            "location": {
+                "start": 0,
+                "end": 10
+            }
+        }
+    }],
+                     predictions=[{
+                         'dataRow': {
+                             'id': 'ckppihxc10005aeyjen11h7jh'
+                         },
+                         'uuid': '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
+                         'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+                         "location": {
+                             "start": 0,
+                             "end": 10
+                         }
+                     }],
+                     expected=1)
+
+
+@pytest.fixture
+def no_matching_ner():
+    return NameSpace(labels=[{
+        'featureId': 'ckppivl7p0006aeyj92cezr9d',
+        'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+        'format': "text.location",
+        'data': {
+            "location": {
+                "start": 0,
+                "end": 5
+            }
+        }
+    }],
+                     predictions=[{
+                         'dataRow': {
+                             'id': 'ckppihxc10005aeyjen11h7jh'
+                         },
+                         'uuid': '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
+                         'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+                         "location": {
+                             "start": 5,
+                             "end": 10
+                         }
+                     }],
+                     expected=0)
+
+
+@pytest.fixture
+def partial_matching_ner():
+    return NameSpace(labels=[{
+        'featureId': 'ckppivl7p0006aeyj92cezr9d',
+        'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+        'format': "text.location",
+        'data': {
+            "location": {
+                "start": 0,
+                "end": 7
+            }
+        }
+    }],
+                     predictions=[{
+                         'dataRow': {
+                             'id': 'ckppihxc10005aeyjen11h7jh'
+                         },
+                         'uuid': '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
+                         'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+                         "location": {
+                             "start": 3,
+                             "end": 5
+                         }
+                     }],
+                     expected=0.2857142857142857)
diff --git a/tests/data/metrics/iou/data_row/test_data_row_iou.py b/tests/data/metrics/iou/data_row/test_data_row_iou.py
@@ -115,3 +115,10 @@ def test_vector_with_subclass(pair):
 @parametrize("pair", strings_to_fixtures(["point_pair", "line_pair"]))
 def test_others(pair):
     check_iou(pair)
+
+
+@parametrize("pair",
+             strings_to_fixtures(
+                 ["matching_ner", "no_matching_ner", "partial_matching_ner"]))
+def test_ner(pair):
+    check_iou(pair)