addition to tests and update to existing iou calculations

jtsodapop · jtsodapop · commit 885e35b1ccf4 · 2022-03-17T09:38:06.000-04:00
diff --git a/labelbox/data/metrics/iou/calculation.py b/labelbox/data/metrics/iou/calculation.py
@@ -4,12 +4,10 @@
 from shapely.geometry import Polygon
 import numpy as np
 
-from labelbox.data.annotation_types.ner import TextEntity
-
 from ..group import get_feature_pairs, get_identifying_key, has_no_annotations, has_no_matching_annotations
 from ...annotation_types import (ObjectAnnotation, ClassificationAnnotation,
                                  Mask, Geometry, Point, Line, Checklist, Text,
-                                 Radio, ScalarMetricValue)
+                                 TextEntity, Radio, ScalarMetricValue)
 
 
 def miou(ground_truths: List[Union[ObjectAnnotation, ClassificationAnnotation]],
@@ -63,6 +61,8 @@ def feature_miou(ground_truths: List[Union[ObjectAnnotation,
         return vector_miou(ground_truths, predictions, include_subclasses)
     elif isinstance(predictions[0], ClassificationAnnotation):
         return classification_miou(ground_truths, predictions)
+    elif isinstance(predictions[0].value, TextEntity):
+        return ner_miou(ground_truths, predictions, include_subclasses)
     else:
         raise ValueError(
             f"Unexpected annotation found. Found {type(predictions[0].value)}")
@@ -293,4 +293,29 @@ def _ner_iou(ner1: TextEntity, ner2: TextEntity):
     #edge case of only one character in text
     if union_start == union_end:
         return 1
-    return (intersection_end - intersection_start) / (union_end - union_start)
+    #if there is no intersection
+    if intersection_start > intersection_end:
+        return 0
+    return (intersection_end - intersection_start) / (union_end - union_start)
+
+
+def ner_miou(ground_truths: List[ObjectAnnotation],
+             predictions: List[ObjectAnnotation],
+             include_subclasses: bool) -> Optional[ScalarMetricValue]:
+    """
+    Computes iou score for all features with the same feature schema id.
+    Calculation includes subclassifications.
+
+    Args:
+        ground_truths: List of ground truth ner annotations
+        predictions: List of prediction ner annotations
+    Returns:
+        float representing the iou score for the feature type.
+         If there are no matches then this returns none
+    """
+    if has_no_matching_annotations(ground_truths, predictions):
+        return 0.
+    elif has_no_annotations(ground_truths, predictions):
+        return None
+    pairs = _get_ner_pairs(ground_truths, predictions)
+    return object_pair_miou(pairs, include_subclasses)
diff --git a/tests/data/metrics/iou/data_row/conftest.py b/tests/data/metrics/iou/data_row/conftest.py
@@ -320,45 +320,42 @@ def empty_radio_prediction():
 
 @pytest.fixture
 def matching_checklist():
-    return NameSpace(
-        labels=[],
-        classifications=[{
-            'featureId':
-                '1234567890111213141516171',
-            'schemaId':
-                'ckppid25v0000aeyjmxfwlc7t',
-            'uuid':
-                '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
-            'schemaId':
-                'ckppid25v0000aeyjmxfwlc7t',
-            'answers': [{
-                'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
-            }, {
-                'schemaId': 'ckppide010001aeyj0yhiaghc'
-            }, {
-                'schemaId': 'ckppidq4u0002aeyjmcc4toxw'
-            }]
-        }],
-        predictions=[{
-            'uuid':
-                '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
-            'schemaId':
-                'ckppid25v0000aeyjmxfwlc7t',
-            'dataRow': {
-                'id': 'ckppihxc10005aeyjen11h7jh'
-            },
-            'answers': [{
-                'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
-            }, {
-                'schemaId': 'ckppide010001aeyj0yhiaghc'
-            }, {
-                'schemaId': 'ckppidq4u0002aeyjmcc4toxw'
-            }]
-        }],
-        data_row_expected=1.,
-        #  expected = [1.]
-        #  expected=[1., 1., 1.])
-        expected={1.0: 3})
+    return NameSpace(labels=[],
+                     classifications=[{
+                         'featureId':
+                             '1234567890111213141516171',
+                         'schemaId':
+                             'ckppid25v0000aeyjmxfwlc7t',
+                         'uuid':
+                             '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
+                         'schemaId':
+                             'ckppid25v0000aeyjmxfwlc7t',
+                         'answers': [{
+                             'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+                         }, {
+                             'schemaId': 'ckppide010001aeyj0yhiaghc'
+                         }, {
+                             'schemaId': 'ckppidq4u0002aeyjmcc4toxw'
+                         }]
+                     }],
+                     predictions=[{
+                         'uuid':
+                             '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
+                         'schemaId':
+                             'ckppid25v0000aeyjmxfwlc7t',
+                         'dataRow': {
+                             'id': 'ckppihxc10005aeyjen11h7jh'
+                         },
+                         'answers': [{
+                             'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+                         }, {
+                             'schemaId': 'ckppide010001aeyj0yhiaghc'
+                         }, {
+                             'schemaId': 'ckppidq4u0002aeyjmcc4toxw'
+                         }]
+                     }],
+                     data_row_expected=1.,
+                     expected={1.0: 3})
 
 
 @pytest.fixture
@@ -699,3 +696,84 @@ def point_pair():
                          }
                      }],
                      expected=0.879113232477017)
+
+
+@pytest.fixture
+def matching_ner():
+    return NameSpace(labels=[{
+        'featureId': 'ckppivl7p0006aeyj92cezr9d',
+        'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+        'format': "text.location",
+        'data': {
+            "location": {
+                "start": 0,
+                "end": 10
+            }
+        }
+    }],
+                     predictions=[{
+                         'dataRow': {
+                             'id': 'ckppihxc10005aeyjen11h7jh'
+                         },
+                         'uuid': '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
+                         'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+                         "location": {
+                             "start": 0,
+                             "end": 10
+                         }
+                     }],
+                     expected=1)
+
+
+@pytest.fixture
+def no_matching_ner():
+    return NameSpace(labels=[{
+        'featureId': 'ckppivl7p0006aeyj92cezr9d',
+        'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+        'format': "text.location",
+        'data': {
+            "location": {
+                "start": 0,
+                "end": 5
+            }
+        }
+    }],
+                     predictions=[{
+                         'dataRow': {
+                             'id': 'ckppihxc10005aeyjen11h7jh'
+                         },
+                         'uuid': '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
+                         'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+                         "location": {
+                             "start": 5,
+                             "end": 10
+                         }
+                     }],
+                     expected=0)
+
+
+@pytest.fixture
+def partial_matching_ner():
+    return NameSpace(labels=[{
+        'featureId': 'ckppivl7p0006aeyj92cezr9d',
+        'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+        'format': "text.location",
+        'data': {
+            "location": {
+                "start": 0,
+                "end": 7
+            }
+        }
+    }],
+                     predictions=[{
+                         'dataRow': {
+                             'id': 'ckppihxc10005aeyjen11h7jh'
+                         },
+                         'uuid': '76e0dcea-fe46-43e5-95f5-a5e3f378520a',
+                         'schemaId': 'ckppid25v0000aeyjmxfwlc7t',
+                         "location": {
+                             "start": 3,
+                             "end": 5
+                         }
+                     }],
+                     expected=0.2857142857142857)
diff --git a/tests/data/metrics/iou/data_row/test_data_row_iou.py b/tests/data/metrics/iou/data_row/test_data_row_iou.py
@@ -115,3 +115,10 @@ def test_vector_with_subclass(pair):
 @parametrize("pair", strings_to_fixtures(["point_pair", "line_pair"]))
 def test_others(pair):
     check_iou(pair)
+
+
+@parametrize("pair",
+             strings_to_fixtures(
+                 ["matching_ner", "no_matching_ner", "partial_matching_ner"]))
+def test_ner(pair):
+    check_iou(pair)