wip

Matt Sokoloff · Matt Sokoloff · commit 141a05097c67 · 2021-09-01T13:54:37.000-04:00
diff --git a/labelbox/data/annotation_types/annotation.py b/labelbox/data/annotation_types/annotation.py
@@ -16,6 +16,7 @@ class BaseAnnotation(FeatureSchema):
 
 class ClassificationAnnotation(BaseAnnotation):
     """Class representing classification annotations (annotations that don't have a location) """
+
     value: Union[Text, Checklist, Radio, Dropdown]
 
 
diff --git a/labelbox/data/metrics/iou.py b/labelbox/data/metrics/iou.py
@@ -1,4 +1,5 @@
 # type: ignore
+from labelbox.data.annotation_types.metrics.scalar import CustomScalarMetric
 from typing import Dict, List, Optional, Tuple, Union
 from shapely.geometry import Polygon
 from itertools import product
@@ -10,7 +11,73 @@
                                 Line, Checklist, Text, Radio)
 
 
-def data_row_miou(ground_truth: Label, prediction: Label) -> Optional[float]:
+
+def subclass_ious(ground_truth: Label, prediction: Label) -> Dict[str, Optional[float]]:
+    """
+    # This function effectively flattens all Label classes and computes the iou.
+    # Text is ignored for this function.
+    # So for Radio or Checkbox  if you have an animal detection model and the model predicts:
+    # Polygon - cat
+        Radio - orange
+        Checklist - fluffy
+
+    # This all gets grouped into one category cat:orange:fluffy
+    # This has to match
+
+
+    The most appropriate use case for this is if you have one radio subclasses that you prefer to treat as top level.
+    Otherwise this function is a bit naive and if you want something to specifically suite
+    your use case then create a new function based off this one.
+    """
+    prediction_annotations = _create_feature_lookup(prediction.annotations)
+    ground_truth_annotations = _create_feature_lookup(ground_truth.annotations)
+    feature_schemas = set(prediction_annotations.keys()).union(
+        set(ground_truth_annotations.keys()))
+
+
+    def _create_classification_feature_lookup(annotations: Union[List[ObjectAnnotation], List[ClassificationAnnotation]]):
+        # Note that these annotations should all be of the same type..
+
+        if not len(annotations) or isinstance(annotations[0], ClassificationAnnotation):
+            return annotations
+
+        grouped_annotations = defaultdict(list)
+        for annotation in annotations:
+            row = []
+            classifications = [classification.value for classification in annotation.classifications if isinstance(classification.value, Radio)]
+            classifications = [classification.answer.name or classification.answer.feature_schema_id for classification in classifications ]
+            # TODO: create the lookup
+            grouped_annotations[annotation.name or annotation.feature_schema_id].append(annotation)
+
+        return grouped_annotations
+
+
+    ious = []
+    for key in feature_schemas:
+        # We shouldn't have any nones. Since the keys are generated by the presence of the object.
+        prediction_annotations = prediction_annotations[key]
+        ground_truth_annotations =
+
+
+
+def feature_miou(ground_truth : Label, prediction: Label) -> List[CustomScalarMetric]:
+    return [
+        CustomScalarMetric(metric_name = "iou", metric_value = value, feature_name = name)
+        for name, value in get_iou_across_features(ground_truth.annotations, prediction.annotations)
+        if value is not None
+    ]
+
+
+# TODO: What should we call this?
+# We should be returning these objects..
+def data_row_miou_v2(ground_truth: Label, prediction: Label, include_subclasses = True) -> List[CustomScalarMetric]:
+    return CustomScalarMetric(
+        metric_name = "iou",
+        metric_value = data_row_miou(ground_truth=ground_truth, prediction=prediction, include_subclasses = include_subclasses)
+    )
+
+
+def data_row_miou(ground_truth: Label, prediction: Label, include_subclasses = True) -> Optional[float]:
     """
     Calculate iou for two labels corresponding to the same data row.
 
@@ -21,13 +88,20 @@ def data_row_miou(ground_truth: Label, prediction: Label) -> Optional[float]:
         float indicating the iou score for this data row.
         Returns None if there are no annotations in ground_truth or prediction Labels
     """
-    return get_iou_across_features(ground_truth.annotations,
-                                   prediction.annotations)
+    feature_ious = get_iou_across_features(ground_truth.annotations,
+                                   prediction.annotations, include_subclasses)
+    return average_ious(feature_ious)
+
+
+def average_ious(feature_ious : Dict[str, Optional[float]]) -> Optional[float]:
+    ious = [iou for iou in feature_ious.values() if iou is not None]
+    return None if not len(ious) else np.mean(ious)
 
 
 def get_iou_across_features(
     ground_truths: List[Union[ObjectAnnotation, ClassificationAnnotation]],
-    predictions: List[Union[ObjectAnnotation, ClassificationAnnotation]]
+    predictions: List[Union[ObjectAnnotation, ClassificationAnnotation]],
+    include_subclasses = True
 ) -> Optional[float]:
     """
     Groups annotations by feature_schema_id or name (which is available), calculates iou score and returns the mean across all features.
@@ -43,18 +117,21 @@ def get_iou_across_features(
     ground_truth_annotations = _create_feature_lookup(ground_truths)
     feature_schemas = set(prediction_annotations.keys()).union(
         set(ground_truth_annotations.keys()))
-    ious = [
-        feature_miou(ground_truth_annotations[feature_schema],
-                     prediction_annotations[feature_schema])
+    ious = {
+        feature_schema: feature_miou(ground_truth_annotations[feature_schema],
+                     prediction_annotations[feature_schema], include_subclasses)
         for feature_schema in feature_schemas
-    ]
-    ious = [iou for iou in ious if iou is not None]
-    return None if not len(ious) else np.mean(ious)
+    }
+    return ious
+    #ious = [iou for iou in ious if iou is not None] # TODO: What causes this to be None?
+
+    return #None if not len(ious) else np.mean(ious)
 
 
 def feature_miou(
     ground_truths: List[Union[ObjectAnnotation, ClassificationAnnotation]],
     predictions: List[Union[ObjectAnnotation, ClassificationAnnotation]],
+    include_subclasses: bool
 ) -> Optional[float]:
     """
     Computes iou score for all features with the same feature schema id.
@@ -66,15 +143,19 @@ def feature_miou(
         float representing the iou score for the feature type if score can be computed otherwise None.
     """
     if len(ground_truths) and not len(predictions):
-        # No existing predictions but existing labels means no matches.
+        # No existing predictions but existing ground truths means no matches.
+        return 0.
+    elif not len(ground_truths) and len(predictions):
+        # No ground truth annotations but there are predictions means no matches
         return 0.
     elif not len(ground_truths) and not len(predictions):
-        # Ignore examples that do not have any labels or predictions
-        return
+        # Ignore examples that do not have any annotations or predictions
+        # This could maybe be counted as correct but could also skew the stats..
+        return # Undefined (neither wrong nor right. )
     elif isinstance(predictions[0].value, Mask):
-        return mask_miou(ground_truths, predictions)
+        return mask_miou(ground_truths, predictions, include_subclasses)
     elif isinstance(predictions[0].value, Geometry):
-        return vector_miou(ground_truths, predictions)
+        return vector_miou(ground_truths, predictions, include_subclasses)
     elif isinstance(predictions[0], ClassificationAnnotation):
         return classification_miou(ground_truths, predictions)
     else:
@@ -84,7 +165,7 @@ def feature_miou(
 
 def vector_miou(ground_truths: List[ObjectAnnotation],
                 predictions: List[ObjectAnnotation],
-                buffer=70.) -> float:
+                buffer=70., include_subclasses = True) -> float:
     """
     Computes iou score for all features with the same feature schema id.
     Calculation includes subclassifications.
@@ -105,10 +186,13 @@ def vector_miou(ground_truths: List[ObjectAnnotation],
         if id(prediction) not in solution_features and id(
                 ground_truth) not in solution_features:
             solution_features.update({id(prediction), id(ground_truth)})
-            classification_iou = get_iou_across_features(
-                prediction.classifications, ground_truth.classifications)
-            classification_iou = classification_iou if classification_iou is not None else agreement
-            solution_agreements.append((agreement + classification_iou) / 2.)
+            if include_subclasses:
+                classification_iou = average_ious(get_iou_across_features(
+                    prediction.classifications, ground_truth.classifications))
+                classification_iou = classification_iou if classification_iou is not None else agreement
+                solution_agreements.append((agreement + classification_iou) / 2.)
+            else:
+                solution_agreements.append(agreement)
 
     # Add zeros for unmatched Features
     solution_agreements.extend([0.0] *
@@ -117,7 +201,7 @@ def vector_miou(ground_truths: List[ObjectAnnotation],
 
 
 def mask_miou(ground_truths: List[ObjectAnnotation],
-              predictions: List[ObjectAnnotation]) -> float:
+              predictions: List[ObjectAnnotation], include_subclasses = True) -> float:
     """
     Computes iou score for all features with the same feature schema id.
     Calculation includes subclassifications.
@@ -138,6 +222,10 @@ def mask_miou(ground_truths: List[ObjectAnnotation],
             "Prediction and mask must have the same shape."
             f" Found {prediction_np.shape}/{ground_truth_np.shape}.")
 
+    agreement = _mask_iou(ground_truth_np, prediction_np)
+    if not include_subclasses:
+        return agreement
+
     prediction_classifications = []
     for prediction in predictions:
         prediction_classifications.extend(prediction.classifications)
@@ -147,7 +235,7 @@ def mask_miou(ground_truths: List[ObjectAnnotation],
 
     classification_iou = get_iou_across_features(ground_truth_classifications,
                                                  prediction_classifications)
-    agreement = _mask_iou(ground_truth_np, prediction_np)
+
     classification_iou = classification_iou if classification_iou is not None else agreement
     return (agreement + classification_iou) / 2.
 
@@ -225,10 +313,40 @@ def _create_feature_lookup(
         and the value is a list of annotations that have that feature_schema_id (or name)
 
     """
+    # TODO: Add a check here.
+    """
+
+    We don't want to select name for one and then feature_schema_id for the other.
+    I think in another function we should check
+
+    Do we want to require that the user provides the feature name?
+    We don't really want schema ids showing up in the metric names..
+
+    So:
+
+    Also add a freakin test.
+    ####
+    all_schema_ids_defined_pred, all_names_defined_pred = check_references(pred_annotations)
+    if (not all_schema_ids_defined and not all_names_defined_pred):
+        raise ValueError("All data must have feature_schema_ids or names set")
+
+
+    all_schema_ids_defined_gt, all_names_defined_gt = check_references(gt_annotations)
+
+    #Prefer name becuse the user will be able to know what it means
+    #Schema id incase that doesn't exist..
+    if (all_names_defined_pred and all_names_defined_gt):
+        return 'name'
+    elif all_schema_ids_defined_pred and all_schema_ids_defined_gt:
+        return 'feature_schema_id'
+    else:
+        raise ValueError("Ground truth and prediction annotations must have set all name or feature ids. Otherwise there is no key to match on. Please update.")
+    """
     grouped_annotations = defaultdict(list)
     for annotation in annotations:
-        grouped_annotations[annotation.feature_schema_id or
-                            annotation.name].append(annotation)
+        grouped_annotations[annotation.name or
+                            annotation.feature_schema_id].append(annotation)
+
     return grouped_annotations
 
 
diff --git a/tests/data/metrics/conftest.py b/tests/data/metrics/conftest.py
@@ -675,3 +675,11 @@ def point_pair():
                          }
                      }],
                      expected=0.879113232477017)
+
+
+# TODO:
+# Test no predictions and no labels
+# We want the behvaior to be
+# (len(predictions) ==0  and len(labels)> 0) == 0
+# (len(predictions) > 1  and len(labels) == 0) == 0
+# (len(predictions) ==0   and len(labels) == 0) == None
diff --git a/tests/data/serialization/ndjson/test_metric.py b/tests/data/serialization/ndjson/test_metric.py
@@ -16,7 +16,7 @@ def test_metric():
     list(LBV1Converter.serialize(label_list))
 
 
-def test_metric():
+def test_custom_metric():
     with open('tests/data/assets/ndjson/custom_scalar_import.json',
               'r') as file:
         data = json.load(file)