nits

sasha-scale · sasha-scale · commit 0f66bc7c553d · 2021-02-22T15:37:00.000-08:00
diff --git a/nucleus/__init__.py b/nucleus/__init__.py
@@ -69,7 +69,11 @@
 
 from .dataset import Dataset
 from .dataset_item import DatasetItem
-from .annotation import BoxAnnotation, PolygonAnnotation, SegmentationAnnotation
+from .annotation import (
+    BoxAnnotation,
+    PolygonAnnotation,
+    SegmentationAnnotation,
+)
 from .prediction import BoxPrediction, PolygonPrediction
 from .model_run import ModelRun
 from .slice import Slice
@@ -79,6 +83,7 @@
     construct_annotation_payload,
     construct_model_creation_payload,
     construct_box_predictions_payload,
+    construct_segmentation_payload,
 )
 from .constants import (
     NUCLEUS_ENDPOINT,
@@ -464,7 +469,9 @@ def exception_handler(request, exception):
     def annotate_dataset(
         self,
         dataset_id: str,
-        annotations: List[Union[BoxAnnotation, PolygonAnnotation, SegmentationAnnotation]],
+        annotations: List[
+            Union[BoxAnnotation, PolygonAnnotation, SegmentationAnnotation]
+        ],
         update: bool,
         batch_size: int = 100,
     ):
@@ -477,8 +484,16 @@ def annotate_dataset(
         """
 
         # Split payload into segmentations and Box/Polygon
-        segmentations = [ann for ann in annotations if isinstance(ann, SegmentationAnnotation)]
-        other_annotations = [ann for ann in annotations if not isinstance(ann, SegmentationAnnotation)]
+        segmentations = [
+            ann
+            for ann in annotations
+            if isinstance(ann, SegmentationAnnotation)
+        ]
+        other_annotations = [
+            ann
+            for ann in annotations
+            if not isinstance(ann, SegmentationAnnotation)
+        ]
 
         batches = [
             other_annotations[i : i + batch_size]
@@ -496,39 +511,42 @@ def annotate_dataset(
             ANNOTATIONS_IGNORED_KEY: 0,
         }
 
-        tqdm_batches = self.tqdm_bar(batches)
+        total_batches = len(batches) + len(semseg_batches)
 
-        for batch in tqdm_batches:
-            payload = construct_annotation_payload(batch, update)
-            response = self._make_request(
-                payload, f"dataset/{dataset_id}/annotate"
-            )
-            if STATUS_CODE_KEY in response:
-                agg_response[ERRORS_KEY] = response
-            else:
-                agg_response[ANNOTATIONS_PROCESSED_KEY] += response[
-                    ANNOTATIONS_PROCESSED_KEY
-                ]
-                agg_response[ANNOTATIONS_IGNORED_KEY] += response[
-                    ANNOTATIONS_IGNORED_KEY
-                ]
+        tqdm_batches = self.tqdm_bar(batches)
 
-        for s_batch in semseg_batches:
-            payload = {"segmentations": [seg.to_payload() for seg in s_batch]}
-            if update:
-                payload["force"] = update
-            response = self._make_request(
-                payload, f"dataset/{dataset_id}/annotate_segmentation"
-            )
-            if STATUS_CODE_KEY in response:
-                agg_response[ERRORS_KEY] = response
-            else:
-                agg_response[ANNOTATIONS_PROCESSED_KEY] += response[
-                    ANNOTATIONS_PROCESSED_KEY
-                ]
-                agg_response[ANNOTATIONS_IGNORED_KEY] += response[
-                    ANNOTATIONS_IGNORED_KEY
-                ]
+        with self.tqdm_bar(total=total_batches) as pbar:
+            for batch in tqdm_batches:
+                payload = construct_annotation_payload(batch, update)
+                response = self._make_request(
+                    payload, f"dataset/{dataset_id}/annotate"
+                )
+                pbar.update(1)
+                if STATUS_CODE_KEY in response:
+                    agg_response[ERRORS_KEY] = response
+                else:
+                    agg_response[ANNOTATIONS_PROCESSED_KEY] += response[
+                        ANNOTATIONS_PROCESSED_KEY
+                    ]
+                    agg_response[ANNOTATIONS_IGNORED_KEY] += response[
+                        ANNOTATIONS_IGNORED_KEY
+                    ]
+
+            for s_batch in semseg_batches:
+                payload = construct_segmentation_payload(s_batch, update)
+                response = self._make_request(
+                    payload, f"dataset/{dataset_id}/annotate_segmentation"
+                )
+                pbar.update(1)
+                if STATUS_CODE_KEY in response:
+                    agg_response[ERRORS_KEY] = response
+                else:
+                    agg_response[ANNOTATIONS_PROCESSED_KEY] += response[
+                        ANNOTATIONS_PROCESSED_KEY
+                    ]
+                    agg_response[ANNOTATIONS_IGNORED_KEY] += response[
+                        ANNOTATIONS_IGNORED_KEY
+                    ]
 
         return agg_response
 
@@ -635,7 +653,7 @@ def predict(
 
         for batch in tqdm_batches:
             batch_payload = construct_box_predictions_payload(
-                annotations,
+                batch,
                 update,
             )
             response = self._make_request(
diff --git a/nucleus/annotation.py b/nucleus/annotation.py
@@ -36,8 +36,8 @@ def __str__(self):
     @classmethod
     def from_json(cls, payload: dict):
         return cls(
-            label=payload.get(LABEL_KEY),
-            index=payload.get(INDEX_KEY),
+            label=payload.get(LABEL_KEY, ""),
+            index=payload.get(INDEX_KEY, None),
             metadata=payload.get(METADATA_KEY, None),
         )
 
@@ -75,7 +75,10 @@ def __str__(self):
     def from_json(cls, payload: dict):
         return cls(
             mask_url=payload[MASK_URL_KEY],
-            annotations=[Segment.from_json(ann) for ann in payload.get(ANNOTATIONS_KEY, [])],
+            annotations=[
+                Segment.from_json(ann)
+                for ann in payload.get(ANNOTATIONS_KEY, [])
+            ],
             reference_id=payload.get(REFERENCE_ID_KEY, None),
             item_id=payload.get(ITEM_ID_KEY, None),
         )
@@ -205,4 +208,4 @@ def to_payload(self) -> dict:
         }
 
     def __str__(self):
-        return str(self.to_payload())
+        return str(self.to_payload())
diff --git a/nucleus/constants.py b/nucleus/constants.py
@@ -51,3 +51,4 @@
 AUTOTAGS_KEY = "autotags"
 MASK_URL_KEY = "mask_url"
 INDEX_KEY = "index"
+SEGMENTATIONS_KEY = "SEGMENTATIONS"
diff --git a/nucleus/payload_constructor.py b/nucleus/payload_constructor.py
@@ -1,6 +1,10 @@
 from typing import List, Optional, Dict, Union
 from .dataset_item import DatasetItem
-from .annotation import BoxAnnotation, PolygonAnnotation
+from .annotation import (
+    BoxAnnotation,
+    PolygonAnnotation,
+    SegmentationAnnotation,
+)
 from .prediction import BoxPrediction, PolygonPrediction
 from .constants import (
     ANNOTATION_UPDATE_KEY,
@@ -11,6 +15,7 @@
     ITEMS_KEY,
     FORCE_KEY,
     MODEL_ID_KEY,
+    SEGMENTATIONS_KEY,
 )
 
 
@@ -39,6 +44,17 @@ def construct_annotation_payload(
     return {ANNOTATIONS_KEY: annotations, ANNOTATION_UPDATE_KEY: update}
 
 
+def construct_segmentation_payload(
+    annotation_items: List[SegmentationAnnotation],
+    update: bool,
+) -> dict:
+    annotations = []
+    for annotation_item in annotation_items:
+        annotations.append(annotation_item.to_payload())
+
+    return {SEGMENTATIONS_KEY: annotations, "force": update}
+
+
 def construct_box_predictions_payload(
     box_predictions: List[Union[BoxPrediction, PolygonPrediction]],
     update: bool,
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -2,31 +2,34 @@
 from urllib.parse import urlparse
 import boto3
 
-PRESIGN_EXPIRY_SECONDS = 60*60*24*2 #2 days
+PRESIGN_EXPIRY_SECONDS = 60 * 60 * 24 * 2  # 2 days
 
-TEST_MODEL_NAME = '[PyTest] Test Model'
-TEST_MODEL_REFERENCE = '[PyTest] Test Model Reference'
-TEST_MODEL_RUN = '[PyTest] Test Model Run'
-TEST_DATASET_NAME = '[PyTest] Test Dataset'
-TEST_SLICE_NAME = '[PyTest] Test Slice'
+TEST_MODEL_NAME = "[PyTest] Test Model"
+TEST_MODEL_REFERENCE = "[PyTest] Test Model Reference"
+TEST_MODEL_RUN = "[PyTest] Test Model Run"
+TEST_DATASET_NAME = "[PyTest] Test Dataset"
+TEST_SLICE_NAME = "[PyTest] Test Slice"
 TEST_IMG_URLS = [
-    's3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/6dd63871-831611a6.jpg',
-    's3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/82c1005c-e2d1d94f.jpg',
-    's3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/7f2e1814-6591087d.jpg',
-    's3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/06924f46-1708b96f.jpg',
-    's3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/89b42832-10d662f4.jpg',
+    "s3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/6dd63871-831611a6.jpg",
+    "s3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/82c1005c-e2d1d94f.jpg",
+    "s3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/7f2e1814-6591087d.jpg",
+    "s3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/06924f46-1708b96f.jpg",
+    "s3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/89b42832-10d662f4.jpg",
 ]
 
+
 def get_signed_url(url):
     bucket, key = get_s3_details(url)
     return s3_sign(bucket, key)
 
+
 def get_s3_details(url):
     # Expects S3 URL format to be https://<BUCKET>.s3.amazonaws.com/<KEY>
     parsed = urlparse(url)
-    bucket = parsed.netloc[:parsed.netloc.find(".")]
+    bucket = parsed.netloc[: parsed.netloc.find(".")]
     return bucket, parsed.path[1:]
 
+
 def s3_sign(bucket, key):
     s3 = boto3.client("s3")
     return s3.generate_presigned_url(
@@ -37,35 +40,37 @@ def s3_sign(bucket, key):
         },
         ExpiresIn=PRESIGN_EXPIRY_SECONDS,
     )
-    
+
+
 def reference_id_from_url(url):
     return Path(url).name
 
+
 TEST_BOX_ANNOTATIONS = [
     {
-        'label': f'[Pytest] Box Annotation ${i}',
-        'x': 50 + i * 10,
-        'y': 60 + i * 10,
-        'width': 70 + i * 10,
-        'height': 80 + i * 10,
-        'reference_id': reference_id_from_url(TEST_IMG_URLS[i]),
-        'annotation_id': f'[Pytest] Box Annotation Annotation Id{i}',
+        "label": f"[Pytest] Box Annotation ${i}",
+        "x": 50 + i * 10,
+        "y": 60 + i * 10,
+        "width": 70 + i * 10,
+        "height": 80 + i * 10,
+        "reference_id": reference_id_from_url(TEST_IMG_URLS[i]),
+        "annotation_id": f"[Pytest] Box Annotation Annotation Id{i}",
     }
     for i in range(len(TEST_IMG_URLS))
 ]
 
 TEST_POLYGON_ANNOTATIONS = [
     {
-        'label': f'[Pytest] Polygon Annotation ${i}',
-        'vertices': [
+        "label": f"[Pytest] Polygon Annotation ${i}",
+        "vertices": [
             {
-                'x': 50 + i * 10 + j,
-                'y': 60 + i * 10 + j,
+                "x": 50 + i * 10 + j,
+                "y": 60 + i * 10 + j,
             }
             for j in range(3)
         ],
-        'reference_id': reference_id_from_url(TEST_IMG_URLS[i]),
-        'annotation_id': f'[Pytest] Polygon Annotation Annotation Id{i}',
+        "reference_id": reference_id_from_url(TEST_IMG_URLS[i]),
+        "annotation_id": f"[Pytest] Polygon Annotation Annotation Id{i}",
     }
     for i in range(len(TEST_IMG_URLS))
 ]
@@ -76,52 +81,62 @@ def reference_id_from_url(url):
         "reference_id": reference_id_from_url(TEST_IMG_URLS[i]),
         "mask_url": get_signed_url(TEST_MASK_URL),
         "annotations": [
-            {"label": "bear", "index": 2}, {"label": "grass-merged", "index": 1}
-        ]
+            {"label": "bear", "index": 2},
+            {"label": "grass-merged", "index": 1},
+        ],
     }
     for i in range(len(TEST_IMG_URLS))
 ]
 
 TEST_BOX_PREDICTIONS = [
-    {
-        **TEST_BOX_ANNOTATIONS[i],
-        'confidence': 0.10 * i
-    }
+    {**TEST_BOX_ANNOTATIONS[i], "confidence": 0.10 * i}
     for i in range(len(TEST_BOX_ANNOTATIONS))
 ]
 
 TEST_POLYGON_PREDICTIONS = [
-    {
-        **TEST_POLYGON_ANNOTATIONS[i],
-        'confidence': 0.10 * i
-    }
+    {**TEST_POLYGON_ANNOTATIONS[i], "confidence": 0.10 * i}
     for i in range(len(TEST_POLYGON_ANNOTATIONS))
 ]
 
 
 # Asserts that a box annotation instance matches a dict representing its properties.
 # Useful to check annotation uploads/updates match.
 def assert_box_annotation_matches_dict(annotation_instance, annotation_dict):
-    assert annotation_instance.label == annotation_dict['label']
-    assert annotation_instance.x == annotation_dict['x']
-    assert annotation_instance.y == annotation_dict['y']
-    assert annotation_instance.height == annotation_dict['height']
-    assert annotation_instance.width == annotation_dict['width']
-    assert annotation_instance.annotation_id == annotation_dict['annotation_id']
-
-def assert_polygon_annotation_matches_dict(annotation_instance, annotation_dict):
-    assert annotation_instance.label == annotation_dict['label']
-    assert annotation_instance.annotation_id == annotation_dict['annotation_id']
-    for instance_pt, dict_pt in zip(annotation_instance.vertices, annotation_dict['vertices']):
-        assert instance_pt['x'] == dict_pt['x']
-        assert instance_pt['y'] == dict_pt['y']
+    assert annotation_instance.label == annotation_dict["label"]
+    assert annotation_instance.x == annotation_dict["x"]
+    assert annotation_instance.y == annotation_dict["y"]
+    assert annotation_instance.height == annotation_dict["height"]
+    assert annotation_instance.width == annotation_dict["width"]
+    assert (
+        annotation_instance.annotation_id == annotation_dict["annotation_id"]
+    )
+
+
+def assert_polygon_annotation_matches_dict(
+    annotation_instance, annotation_dict
+):
+    assert annotation_instance.label == annotation_dict["label"]
+    assert (
+        annotation_instance.annotation_id == annotation_dict["annotation_id"]
+    )
+    for instance_pt, dict_pt in zip(
+        annotation_instance.vertices, annotation_dict["vertices"]
+    ):
+        assert instance_pt["x"] == dict_pt["x"]
+        assert instance_pt["y"] == dict_pt["y"]
+
 
 # Asserts that a box prediction instance matches a dict representing its properties.
 # Useful to check prediction uploads/updates match.
 def assert_box_prediction_matches_dict(prediction_instance, prediction_dict):
     assert_box_annotation_matches_dict(prediction_instance, prediction_dict)
-    assert prediction_instance.confidence == prediction_dict['confidence']
+    assert prediction_instance.confidence == prediction_dict["confidence"]
 
-def assert_polygon_prediction_matches_dict(prediction_instance, prediction_dict):
-    assert_polygon_annotation_matches_dict(prediction_instance, prediction_dict)
-    assert prediction_instance.confidence == prediction_dict['confidence']
+
+def assert_polygon_prediction_matches_dict(
+    prediction_instance, prediction_dict
+):
+    assert_polygon_annotation_matches_dict(
+        prediction_instance, prediction_dict
+    )
+    assert prediction_instance.confidence == prediction_dict["confidence"]
diff --git a/tests/test_annotation.py b/tests/test_annotation.py