debug

Matt Sokoloff · Matt Sokoloff · commit 574780e06762 · 2021-09-07T20:20:29.000-04:00
diff --git a/Makefile b/Makefile
@@ -20,3 +20,12 @@ test-prod: build
 		-e LABELBOX_TEST_ENVIRON="prod" \
 		-e LABELBOX_TEST_API_KEY_PROD=${LABELBOX_TEST_API_KEY_PROD} \
 		local/labelbox-python:test pytest $(PATH_TO_TEST) -svvx
+
+
+
+test-dev: build
+	docker run -it -v ${PWD}:/usr/src -w /usr/src \
+		-e LABELBOX_TEST_ENVIRON="staging" \
+		-e LABELBOX_TEST_API_KEY_PROD=${LABELBOX_TEST_API_KEY_PROD} \
+		local/labelbox-python:test pytest $(PATH_TO_TEST) -svv
+
diff --git a/labelbox/data/annotation_types/metrics/aggregations.py b/labelbox/data/annotation_types/metrics/aggregations.py
@@ -2,7 +2,9 @@
 
 
 class MetricAggregation(Enum):
+    CONFUSION_MATRIX = "CONFUSION_MATRIX"
     ARITHMETIC_MEAN = "ARITHMETIC_MEAN"
     GEOMETRIC_MEAN = "GEOMETRIC_MEAN"
     HARMONIC_MEAN = "HARMONIC_MEAN"
     SUM = "SUM"
+
diff --git a/labelbox/data/annotation_types/metrics/scalar.py b/labelbox/data/annotation_types/metrics/scalar.py
@@ -1,26 +1,54 @@
 from labelbox.data.annotation_types.metrics.aggregations import MetricAggregation
-from typing import Any, Dict, Optional
-from pydantic import BaseModel
+from typing import Any, Dict, Optional, Tuple, Union
+from pydantic import BaseModel, Field, validator, confloat
 
 
-class ScalarMetric(BaseModel):
-    """ Class representing metrics
+ScalarMetricConfidenceValue = Dict[confloat(ge=0, le=1), float]
+ConfusionMatrixMetricConfidenceValue = Dict[confloat(ge=0, le=1), Tuple[int,int,int,int]]
 
-    # For backwards compatibility, metric_name is optional. This will eventually be deprecated
-    # The metric_name will be set to a default name in the editor if it is not set.
 
-    # aggregation will be ignored wihtout providing a metric name.
-    # Not providing a metric name is deprecated.
-    """
-    value: float
+class BaseMetric(BaseModel):
     metric_name: Optional[str] = None
     feature_name: Optional[str] = None
     subclass_name: Optional[str] = None
-    aggregation: MetricAggregation = MetricAggregation.ARITHMETIC_MEAN
     extra: Dict[str, Any] = {}
 
+
+class ScalarMetric(BaseMetric):
+    """ Class representing scalar metrics
+
+    For backwards compatibility, metric_name is optional.
+    The metric_name will be set to a default name in the editor if it is not set.
+    This is not recommended and support for empty metric_name fields will be removed.
+    aggregation will be ignored wihtout providing a metric name.
+    """
+    value: Union[float, ScalarMetricConfidenceValue]
+    aggregation: MetricAggregation = MetricAggregation.ARITHMETIC_MEAN
+
     def dict(self, *args, **kwargs):
         res = super().dict(*args, **kwargs)
         if res['metric_name'] is None:
             res.pop('aggregation')
         return {k: v for k, v in res.items() if v is not None}
+
+    @validator('aggregation')
+    def validate_aggregation(cls, aggregation):
+        if aggregation == MetricAggregation.CONFUSION_MATRIX:
+            raise ValueError("Cannot assign `MetricAggregation.CONFUSION_MATRIX` to `ScalarMetric.aggregation`")
+
+
+
+class ConfusionMatrixMetric(BaseMetric):
+    """ Class representing confusion matrix metrics.
+
+    In the editor, this provides precision, recall, and f-scores.
+    This should be used over multiple scalar metrics so that aggregations are accurate.
+
+    value should be a tuple representing:
+      [True Positive Count, False Positive Count, True Negative Count, False Negative Count]
+
+    aggregation cannot be adjusted for confusion matrix metrics.
+    """
+    value: Union[Tuple[int,int,int,int], ConfusionMatrixMetricConfidenceValue]
+    aggregation: MetricAggregation = Field(MetricAggregation.CONFUSION_MATRIX, const = True)
+
diff --git a/tests/data/annotation_types/test_metrics.py b/tests/data/annotation_types/test_metrics.py
@@ -33,6 +33,62 @@ def test_legacy_scalar_metric():
     next(LabelList([label])).dict() == expected
 
 
+# TODO: Test with confidence
+
+@pytest.mark.parametrize('feature_name,subclass_name,aggregation', [
+    ("cat", "orange", MetricAggregation.ARITHMETIC_MEAN),
+    ("cat", None, MetricAggregation.ARITHMETIC_MEAN),
+    (None, None, MetricAggregation.ARITHMETIC_MEAN),
+    (None, None, None),
+    ("cat", "orange", MetricAggregation.ARITHMETIC_MEAN),
+    ("cat", None, MetricAggregation.HARMONIC_MEAN),
+    (None, None, MetricAggregation.GEOMETRIC_MEAN),
+    (None, None, MetricAggregation.SUM)
+])
+def test_custom_scalar_metric(feature_name, subclass_name, aggregation):
+    value = 0.5
+    kwargs = {'aggregation': aggregation} if aggregation is not None else {}
+    metric = ScalarMetric(metric_name="iou",
+                          value=value,
+                          feature_name=feature_name,
+                          subclass_name=subclass_name,
+                          **kwargs)
+    assert metric.value == value
+
+    label = Label(data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"),
+                  annotations=[metric])
+    expected = {
+        'data': {
+            'external_id': None,
+            'uid': 'ckrmd9q8g000009mg6vej7hzg',
+            'im_bytes': None,
+            'file_path': None,
+            'url': None,
+            'arr': None
+        },
+        'annotations': [{
+            'value':
+                value,
+            'metric_name':
+                'iou',
+            **({
+                'feature_name': feature_name
+            } if feature_name else {}),
+            **({
+                'subclass_name': subclass_name
+            } if subclass_name else {}), 'aggregation':
+                aggregation or MetricAggregation.ARITHMETIC_MEAN,
+            'extra': {}
+        }],
+        'extra': {},
+        'uid': None
+    }
+    assert label.dict() == expected
+    next(LabelList([label])).dict() == expected
+
+
+
+
 @pytest.mark.parametrize('feature_name,subclass_name,aggregation', [
     ("cat", "orange", MetricAggregation.ARITHMETIC_MEAN),
     ("cat", None, MetricAggregation.ARITHMETIC_MEAN),
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -47,19 +47,25 @@ def environ() -> Environ:
 
 
 def graphql_url(environ: str) -> str:
+    return "https://app.replicated-6bd9012.labelbox.dev/api/_gql"
+    """
     if environ == Environ.PROD:
         return 'https://api.labelbox.com/graphql'
     elif environ == Environ.STAGING:
         return 'https://staging-api.labelbox.com/graphql'
     return 'http://host.docker.internal:8080/graphql'
+    """
 
 
 def testing_api_key(environ: str) -> str:
+    return "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja3RhbXFnN2MwMDAxMHljaTAxZDFhaGVqIiwib3JnYW5pemF0aW9uSWQiOiJja3RhbXFnNm4wMDAwMHljaTgwaDI1NXZ3IiwiYXBpS2V5SWQiOiJja3RhbXZmdm4wMDhrMHljaTh1MG40a2hzIiwic2VjcmV0IjoiNzcxOWViMjgyNjUyMWMxODQ5MmJhMjg1NzhmY2FmNDEiLCJpYXQiOjE2MzEwNTMwNDksImV4cCI6MjI2MjIwNTA0OX0.yBLurIRB3xYQkV8MEBm0_LxmdqP9U-8aMj25kASmGLw"
+    """
     if environ == Environ.PROD:
         return os.environ["LABELBOX_TEST_API_KEY_PROD"]
     elif environ == Environ.STAGING:
         return os.environ["LABELBOX_TEST_API_KEY_STAGING"]
     return os.environ["LABELBOX_TEST_API_KEY_LOCAL"]
+    """
 
 
 def cancel_invite(client, invite_id):
@@ -135,9 +141,10 @@ def client(environ: str):
 
 @pytest.fixture(scope="session")
 def image_url(client, environ: str):
-    if environ == Environ.LOCAL:
-        return IMG_URL
-    return client.upload_data(requests.get(IMG_URL).content, sign=True)
+    return IMG_URL
+    #if environ == Environ.LOCAL:
+    #    return IMG_URL
+    #return client.upload_data(requests.get(IMG_URL).content, sign=True)
 
 
 @pytest.fixture