wip

Matt Sokoloff · Matt Sokoloff · commit 201b55b0ab77 · 2021-09-02T08:43:07.000-04:00
diff --git a/labelbox/data/metrics/iou.py b/labelbox/data/metrics/iou.py
@@ -10,64 +10,33 @@
                                 ClassificationAnnotation, Mask, Geometry, Point,
                                 Line, Checklist, Text, Radio)
 
+from .utils import get_lookup_pair
 
 
-def subclass_ious(ground_truth: Label, prediction: Label) -> Dict[str, Optional[float]]:
-    """
-    # This function effectively flattens all Label classes and computes the iou.
-    # Text is ignored for this function.
-    # So for Radio or Checkbox  if you have an animal detection model and the model predicts:
-    # Polygon - cat
-        Radio - orange
-        Checklist - fluffy
-
-    # This all gets grouped into one category cat:orange:fluffy
-    # This has to match
-
-
-    The most appropriate use case for this is if you have one radio subclasses that you prefer to treat as top level.
-    Otherwise this function is a bit naive and if you want something to specifically suite
-    your use case then create a new function based off this one.
-    """
-    identifying = get_identifying_key(prediction.annotations, ground_truth.annotations)
-    prediction_annotations = _create_feature_lookup(prediction.annotations)
-    ground_truth_annotations = _create_feature_lookup(ground_truth.annotations)
-    feature_schemas = set(prediction_annotations.keys()).union(
-        set(ground_truth_annotations.keys()))
-
-
-    def _create_classification_feature_lookup(annotations: Union[List[ObjectAnnotation], List[ClassificationAnnotation]]):
-        # Note that these annotations should all be of the same type..
+"""
+Instead of these functions accepting labels they should accept annotations..
+Then we can add a helper for applying functions across pred and label combinations..
 
-        if not len(annotations) or isinstance(annotations[0], ClassificationAnnotation):
-            return annotations
-
-        grouped_annotations = defaultdict(list)
-        for annotation in annotations:
-            row = []
-            classifications = [classification.value for classification in annotation.classifications if isinstance(classification.value, Radio)]
-            classifications = [classification.answer.name or classification.answer.feature_schema_id for classification in classifications ]
-            # TODO: create the lookup
-            grouped_annotations[annotation.name or annotation.feature_schema_id].append(annotation)
-        return grouped_annotations
+We will get stats for each and the stats will support flattening
 
 
-    #ious = []
-    #for key in feature_schemas:
-        # We shouldn't have any nones. Since the keys are generated by the presence of the object.
-        #prediction_annotations = prediction_annotations[key]
-    #    #ground_truth_annotations =
-
+data_row_iou()
 
+Is it even possible to return a None? If both are none then they won't have keys..
+"""
 
-def feature_miou(ground_truth : Label, prediction: Label) -> List[CustomScalarMetric]:
+def feature_miou(
+    ground_truth : List[Union[ObjectAnnotation, ClassificationAnnotation]],
+    prediction: List[Union[ObjectAnnotation, ClassificationAnnotation]]) -> List[CustomScalarMetric]:
+    # Classifications are supported because we just take a naive approach to them..
     return [
         CustomScalarMetric(metric_name = "iou", metric_value = value, feature_name = name)
-        for name, value in get_iou_across_features(ground_truth.annotations, prediction.annotations)
+        for name, value in get_iou_across_features(ground_truth, prediction)
         if value is not None
     ]
 
 
+
 # TODO: What should we call this?
 # We should be returning these objects..
 def data_row_miou_v2(ground_truth: Label, prediction: Label, include_subclasses = True) -> List[CustomScalarMetric]:
@@ -93,6 +62,47 @@ def data_row_miou(ground_truth: Label, prediction: Label, include_subclasses = T
     return average_ious(feature_ious)
 
 
+def subclass_ious(ground_truth: Label, prediction: Label) -> Dict[str, Optional[float]]:
+    """
+    # This function effectively flattens all Label classes and computes the iou.
+    # Text is ignored for this function.
+    # So for Radio or Checkbox  if you have an animal detection model and the model predicts:
+    # Polygon - cat
+        Radio - orange
+        Checklist - fluffy
+
+    # This all gets grouped into one category cat:orange:fluffy
+    # This has to match
+
+    The most appropriate use case for this is if you have one radio subclasses that you prefer to treat as top level.
+    Otherwise this function is a bit naive and if you want something to specifically suite
+    your use case then create a new function based off this one.
+
+    """
+
+
+    prediction_annotations, ground_truth_annotations, keys = get_lookup_pair(prediction.annotations, ground_truth.annotations)
+
+
+    def _create_classification_feature_lookup(annotations: Union[List[ObjectAnnotation], List[ClassificationAnnotation]]):
+        # Note that these annotations should all be of the same type..
+
+        if not len(annotations) or isinstance(annotations[0], ClassificationAnnotation):
+            return annotations
+
+    ious = []
+    for key in keys:
+        # We shouldn't have any nones. Since the keys are generated by the presence of the object.
+        [classification.value.answer for classification in annotation.classifications if isinstance(classification.value, Radio)]
+        prediction_annotations = prediction_annotations[key]
+        gt_annotations = gt_annotations[key]
+
+
+
+
+
+
+
 def average_ious(feature_ious : Dict[str, Optional[float]]) -> Optional[float]:
     ious = [iou for iou in feature_ious.values() if iou is not None]
     return None if not len(ious) else np.mean(ious)
@@ -113,19 +123,14 @@ def get_iou_across_features(
         float indicating the iou score for all features represented in the annotations passed to this function.
         Returns None if there are no annotations in ground_truth or prediction annotations
     """
-    prediction_annotations = _create_feature_lookup(predictions)
-    ground_truth_annotations = _create_feature_lookup(ground_truths)
-    feature_schemas = set(prediction_annotations.keys()).union(
-        set(ground_truth_annotations.keys()))
+    prediction_annotations, ground_truth_annotations, keys = get_lookup_pair(predictions, ground_truths)
     ious = {
-        feature_schema: feature_miou(ground_truth_annotations[feature_schema],
-                     prediction_annotations[feature_schema], include_subclasses)
-        for feature_schema in feature_schemas
+        key: feature_miou(ground_truth_annotations[key],
+                     prediction_annotations[key], include_subclasses)
+        for key in keys
     }
     return ious
-    #ious = [iou for iou in ious if iou is not None] # TODO: What causes this to be None?
 
-    return #None if not len(ious) else np.mean(ious)
 
 
 def feature_miou(
@@ -148,7 +153,7 @@ def feature_miou(
     elif not len(ground_truths) and len(predictions):
         # No ground truth annotations but there are predictions means no matches
         return 0.
-    elif not len(ground_truths) and not len(predictions):
+    elif not len(ground_truths) and not len(predictions): #TODO: This shouldn't run at all for subclasses. Otherwise it should return 1.
         # Ignore examples that do not have any annotations or predictions
         # This could maybe be counted as correct but could also skew the stats..
         return # Undefined (neither wrong nor right. )
@@ -300,29 +305,6 @@ def checklist_iou(ground_truth: Checklist, prediction: Checklist) -> float:
         len(schema_ids_label | schema_ids_pred))
 
 
-def _create_feature_lookup(
-    annotations: List[Union[ObjectAnnotation, ClassificationAnnotation]]
-) -> Dict[str, List[Union[ObjectAnnotation, ClassificationAnnotation]]]:
-    """
-    Groups annotation by schema id (if available otherwise name).
-
-    Args:
-        annotations: List of annotations to group
-    Returns:
-        a dict where each key is the feature_schema_id (or name)
-        and the value is a list of annotations that have that feature_schema_id (or name)
-
-    """
-    # TODO: Add a check here.
-
-    grouped_annotations = defaultdict(list)
-    for annotation in annotations:
-        grouped_annotations[annotation.name or
-                            annotation.feature_schema_id].append(annotation)
-
-    return grouped_annotations
-
-
 def _get_vector_pairs(
         ground_truths: List[ObjectAnnotation],
         predictions: List[ObjectAnnotation], buffer: float
@@ -356,44 +338,4 @@ def _mask_iou(mask1: np.ndarray, mask2: np.ndarray) -> float:
     return np.sum(mask1 & mask2) / np.sum(mask1 | mask2)
 
 
-def all_have_key(annotations: List[FeatureSchema]) -> Tuple[bool, bool]:
-    """
-    We want to make sure that all feature schemas have names set or feature_schema_ids set.
-
-    """
-    all_names = True
-    all_schemas = True
-    for annotation in annotations:
-        if annotation.name is None:
-            all_names = False
-        if annotation.feature_schema_id is None:
-            all_schemas = False
-    return all_schemas, all_names
-
-def get_identifying_key(pred_annotations, gt_annotations):
-    """
-    We don't want to select name for one and then feature_schema_id for the other.
-    I think in another function we should check
-
-    Do we want to require that the user provides the feature name?
-    We don't really want schema ids showing up in the metric names..
-
-    So:
-    """
-    #TODO: Also add a freakin test.
-    all_schema_ids_defined_pred, all_names_defined_pred = all_have_key(pred_annotations)
-    if (not all_schema_ids_defined_pred and not all_names_defined_pred):
-        raise ValueError("All data must have feature_schema_ids or names set")
-
-
-    all_schema_ids_defined_gt, all_names_defined_gt = all_have_key(gt_annotations)
-
-    #Prefer name becuse the user will be able to know what it means
-    #Schema id incase that doesn't exist..
-    if (all_names_defined_pred and all_names_defined_gt):
-        return 'name'
-    elif all_schema_ids_defined_pred and all_schema_ids_defined_gt:
-        return 'feature_schema_id'
-    else:
-        raise ValueError("Ground truth and prediction annotations must have set all name or feature ids. Otherwise there is no key to match on. Please update.")