[Validate] Introduce better filtering errors (#321)

gatli · web-flow · commit 8eac8281b229 · 2022-06-21T15:13:32.000+02:00
* Raise error if everything is filtered

* Fix error reporting

* Bump and changelog
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,4 +1,4 @@
-fail_fast: true
+fail_fast: false
 repos:
 -   repo: local
     hooks:
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,11 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.14.2](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.2) - 2022-06-21
+
+### Fixed
+- Better error reporting when everything is filtered out by a filter statement in a Validate evaluation function
+
 ## [0.14.1](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.1) - 2022-06-20
 
 ### Fixed
diff --git a/nucleus/annotation.py b/nucleus/annotation.py
@@ -939,6 +939,9 @@ def add_annotations(self, annotations: List[Annotation]):
                 ), f"Unexpected annotation type: {type(annotation)}"
                 self.segmentation_annotations.append(annotation)
 
+    def items(self):
+        return self.__dict__.items()
+
     def __len__(self):
         return (
             len(self.box_annotations)
diff --git a/nucleus/metrics/base.py b/nucleus/metrics/base.py
@@ -4,10 +4,13 @@
 from typing import Iterable, List, Optional, Union
 
 from nucleus.annotation import AnnotationList
+from nucleus.metrics.errors import EverythingFilteredError
 from nucleus.metrics.filtering import (
     ListOfAndFilters,
     ListOfOrAndFilters,
-    apply_filters,
+    compose_helpful_filtering_error,
+    filter_annotation_list,
+    filter_prediction_list,
 )
 from nucleus.prediction import PredictionList
 
@@ -133,64 +136,16 @@ def call_metric(
     def __call__(
         self, annotations: AnnotationList, predictions: PredictionList
     ) -> MetricResult:
-        annotations = self._filter_annotations(annotations)
-        predictions = self._filter_predictions(predictions)
-        return self.call_metric(annotations, predictions)
-
-    def _filter_annotations(self, annotations: AnnotationList):
-        if (
-            self.annotation_filters is None
-            or len(self.annotation_filters) == 0
-        ):
-            return annotations
-        annotations.box_annotations = apply_filters(
-            annotations.box_annotations, self.annotation_filters
-        )
-        annotations.line_annotations = apply_filters(
-            annotations.line_annotations, self.annotation_filters
-        )
-        annotations.polygon_annotations = apply_filters(
-            annotations.polygon_annotations, self.annotation_filters
-        )
-        annotations.cuboid_annotations = apply_filters(
-            annotations.cuboid_annotations, self.annotation_filters
-        )
-        annotations.category_annotations = apply_filters(
-            annotations.category_annotations, self.annotation_filters
-        )
-        annotations.multi_category_annotations = apply_filters(
-            annotations.multi_category_annotations, self.annotation_filters
-        )
-        annotations.segmentation_annotations = apply_filters(
-            annotations.segmentation_annotations, self.annotation_filters
+        filtered_anns = filter_annotation_list(
+            annotations, self.annotation_filters
         )
-        return annotations
-
-    def _filter_predictions(self, predictions: PredictionList):
-        if (
-            self.prediction_filters is None
-            or len(self.prediction_filters) == 0
-        ):
-            return predictions
-        predictions.box_predictions = apply_filters(
-            predictions.box_predictions, self.prediction_filters
+        filtered_preds = filter_prediction_list(
+            predictions, self.prediction_filters
         )
-        predictions.line_predictions = apply_filters(
-            predictions.line_predictions, self.prediction_filters
+        self._raise_if_everything_filtered(
+            annotations, filtered_anns, predictions, filtered_preds
         )
-        predictions.polygon_predictions = apply_filters(
-            predictions.polygon_predictions, self.prediction_filters
-        )
-        predictions.cuboid_predictions = apply_filters(
-            predictions.cuboid_predictions, self.prediction_filters
-        )
-        predictions.category_predictions = apply_filters(
-            predictions.category_predictions, self.prediction_filters
-        )
-        predictions.segmentation_predictions = apply_filters(
-            predictions.segmentation_predictions, self.prediction_filters
-        )
-        return predictions
+        return self.call_metric(annotations, predictions)
 
     @abstractmethod
     def aggregate_score(self, results: List[MetricResult]) -> ScalarResult:
@@ -215,3 +170,26 @@ def aggregate_score(self, results: List[MetricResult]) -> ScalarResult:
                 return ScalarResult(r2_score)
 
         """
+
+    def _raise_if_everything_filtered(
+        self,
+        annotations: AnnotationList,
+        filtered_annotations: AnnotationList,
+        predictions: PredictionList,
+        filtered_predictions: PredictionList,
+    ):
+        msg = []
+        if len(filtered_annotations) == 0:
+            msg.extend(
+                compose_helpful_filtering_error(
+                    annotations, self.annotation_filters
+                )
+            )
+        if len(filtered_predictions) == 0:
+            msg.extend(
+                compose_helpful_filtering_error(
+                    predictions, self.prediction_filters
+                )
+            )
+        if msg:
+            raise EverythingFilteredError("\n".join(msg))
diff --git a/nucleus/metrics/errors.py b/nucleus/metrics/errors.py
@@ -5,3 +5,7 @@ def __init__(
     ):
         self.message = message
         super().__init__(self.message)
+
+
+class EverythingFilteredError(Exception):
+    pass
diff --git a/nucleus/metrics/filtering.py b/nucleus/metrics/filtering.py
@@ -1,3 +1,4 @@
+import copy
 import enum
 import functools
 import logging
@@ -7,13 +8,18 @@
     Iterable,
     List,
     NamedTuple,
+    Optional,
     Sequence,
     Set,
     Tuple,
     Union,
 )
 
+from rich.console import Console
+from rich.table import Table
+
 from nucleus.annotation import (
+    AnnotationList,
     BoxAnnotation,
     CategoryAnnotation,
     CuboidAnnotation,
@@ -29,6 +35,7 @@
     CuboidPrediction,
     LinePrediction,
     PolygonPrediction,
+    PredictionList,
     SegmentationPrediction,
 )
 
@@ -568,3 +575,147 @@ def ensureDNFFilters(filters) -> OrAndDNFFilters:
             formatted_filter.append(and_chain)
         filters = formatted_filter
     return filters
+
+
+def pretty_format_filters_with_or_and(
+    filters: Optional[Union[ListOfOrAndFilters, ListOfAndFilters]]
+):
+    if filters is None:
+        return "No filters applied!"
+    dnf_filters = ensureDNFFilters(filters)
+    or_branches = []
+    for or_branch in dnf_filters:
+        and_statements = []
+        for and_branch in or_branch:
+            if and_branch.type == FilterType.FIELD:
+                class_name = "FieldFilter"
+            elif and_branch.type == FilterType.METADATA:
+                class_name = "MetadataFilter"
+            elif and_branch.type == FilterType.SEGMENT_FIELD:
+                class_name = "SegmentFieldFilter"
+            elif and_branch.type == FilterType.SEGMENT_METADATA:
+                class_name = "SegmentMetadataFilter"
+            else:
+                raise RuntimeError(
+                    f"Un-handled filter type: {and_branch.type}"
+                )
+            op = (
+                and_branch.op.value
+                if isinstance(and_branch.op, FilterOp)
+                else and_branch.op
+            )
+            value_formatted = (
+                f'"{and_branch.value}"'
+                if isinstance(and_branch.value, str)
+                else f"{and_branch.value}".replace("'", '"')
+            )
+            statement = (
+                f'{class_name}("{and_branch.key}", "{op}", {value_formatted})'
+            )
+            and_statements.append(statement)
+
+        or_branches.append(and_statements)
+
+    and_to_join = []
+    for and_statements in or_branches:
+        joined_and = " and ".join(and_statements)
+        if len(or_branches) > 1 and len(and_statements) > 1:
+            joined_and = "(" + joined_and + ")"
+        and_to_join.append(joined_and)
+
+    full_statement = " or ".join(and_to_join)
+    return full_statement
+
+
+def compose_helpful_filtering_error(
+    ann_or_pred_list: Union[AnnotationList, PredictionList], filters
+) -> List[str]:
+    prefix = (
+        "Annotations"
+        if isinstance(ann_or_pred_list, AnnotationList)
+        else "Predictions"
+    )
+    msg = []
+    msg.append(f"{prefix}: All items filtered out by:")
+    msg.append(f" {pretty_format_filters_with_or_and(filters)}")
+    msg.append("")
+    console = Console()
+    table = Table(
+        "Type",
+        "Count",
+        "Labels",
+        title=f"Original {prefix}",
+        title_justify="left",
+    )
+    for ann_or_pred_type, items in ann_or_pred_list.items():
+        if items and isinstance(
+            items[-1], (SegmentationAnnotation, SegmentationPrediction)
+        ):
+            labels = set()
+            for seg in items:
+                labels.update(set(s.label for s in seg.annotations))
+        else:
+            labels = set(a.label for a in items)
+        if items:
+            table.add_row(ann_or_pred_type, str(len(items)), str(list(labels)))
+    with console.capture() as capture:
+        console.print(table)
+    msg.append(capture.get())
+    return msg
+
+
+def filter_annotation_list(
+    annotations: AnnotationList, annotation_filters
+) -> AnnotationList:
+    annotations = copy.deepcopy(annotations)
+    if annotation_filters is None or len(annotation_filters) == 0:
+        return annotations
+    annotations.box_annotations = apply_filters(
+        annotations.box_annotations, annotation_filters
+    )
+    annotations.line_annotations = apply_filters(
+        annotations.line_annotations, annotation_filters
+    )
+    annotations.polygon_annotations = apply_filters(
+        annotations.polygon_annotations, annotation_filters
+    )
+    annotations.cuboid_annotations = apply_filters(
+        annotations.cuboid_annotations, annotation_filters
+    )
+    annotations.category_annotations = apply_filters(
+        annotations.category_annotations, annotation_filters
+    )
+    annotations.multi_category_annotations = apply_filters(
+        annotations.multi_category_annotations, annotation_filters
+    )
+    annotations.segmentation_annotations = apply_filters(
+        annotations.segmentation_annotations, annotation_filters
+    )
+    return annotations
+
+
+def filter_prediction_list(
+    predictions: PredictionList, prediction_filters
+) -> PredictionList:
+    predictions = copy.deepcopy(predictions)
+    if prediction_filters is None or len(prediction_filters) == 0:
+        return predictions
+    predictions.box_predictions = apply_filters(
+        predictions.box_predictions, prediction_filters
+    )
+    predictions.line_predictions = apply_filters(
+        predictions.line_predictions, prediction_filters
+    )
+    predictions.polygon_predictions = apply_filters(
+        predictions.polygon_predictions, prediction_filters
+    )
+    predictions.cuboid_predictions = apply_filters(
+        predictions.cuboid_predictions, prediction_filters
+    )
+    predictions.category_predictions = apply_filters(
+        predictions.category_predictions, prediction_filters
+    )
+    predictions.segmentation_predictions = apply_filters(
+        predictions.segmentation_predictions, prediction_filters
+    )
+    return predictions
diff --git a/nucleus/prediction.py b/nucleus/prediction.py
@@ -600,6 +600,9 @@ class PredictionList:
         default_factory=list
     )
 
+    def items(self):
+        return self.__dict__.items()
+
     def add_predictions(self, predictions: List[Prediction]):
         for prediction in predictions:
             if isinstance(prediction, BoxPrediction):
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ exclude = '''
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.14.1"
+version = "0.14.2"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
diff --git a/tests/metrics/test_filtering.py b/tests/metrics/test_filtering.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-fail_fast: true`
	`1`	`+fail_fast: false`
`2`	`2`	`repos:`
`3`	`3`	`- repo: local`
`4`	`4`	`hooks:`
Original file line number	Diff line number	Diff line change
`@@ -600,6 +600,9 @@ class PredictionList:`
`600`	`600`	`default_factory=list`
`601`	`601`	`)`
`602`	`602`
	`603`	`+ def items(self):`
	`604`	`+ return self.__dict__.items()`
	`605`	`+`
`603`	`606`	`def add_predictions(self, predictions: List[Prediction]):`
`604`	`607`	`for prediction in predictions:`
`605`	`608`	`if isinstance(prediction, BoxPrediction):`