everything works

sasha-scale · sasha-scale · commit 7e7bb422d15e · 2021-02-22T14:37:47.000-08:00
diff --git a/nucleus/__init__.py b/nucleus/__init__.py
@@ -69,7 +69,7 @@
 
 from .dataset import Dataset
 from .dataset_item import DatasetItem
-from .annotation import BoxAnnotation, PolygonAnnotation
+from .annotation import BoxAnnotation, PolygonAnnotation, SegmentationAnnotation
 from .prediction import BoxPrediction, PolygonPrediction
 from .model_run import ModelRun
 from .slice import Slice
@@ -464,7 +464,7 @@ def exception_handler(request, exception):
     def annotate_dataset(
         self,
         dataset_id: str,
-        annotations: List[Union[BoxAnnotation, PolygonAnnotation]],
+        annotations: List[Union[BoxAnnotation, PolygonAnnotation, SegmentationAnnotation]],
         update: bool,
         batch_size: int = 100,
     ):
@@ -476,9 +476,18 @@ def annotate_dataset(
         :return: {"dataset_id: str, "annotations_processed": int}
         """
 
+        # Split payload into segmentations and Box/Polygon
+        segmentations = [ann for ann in annotations if isinstance(ann, SegmentationAnnotation)]
+        other_annotations = [ann for ann in annotations if not isinstance(ann, SegmentationAnnotation)]
+
         batches = [
-            annotations[i : i + batch_size]
-            for i in range(0, len(annotations), batch_size)
+            other_annotations[i : i + batch_size]
+            for i in range(0, len(other_annotations), batch_size)
+        ]
+
+        semseg_batches = [
+            segmentations[i : i + batch_size]
+            for i in range(0, len(segmentations), batch_size)
         ]
 
         agg_response = {
@@ -504,6 +513,23 @@ def annotate_dataset(
                     ANNOTATIONS_IGNORED_KEY
                 ]
 
+        for s_batch in semseg_batches:
+            payload = {"segmentations": [seg.to_payload() for seg in s_batch]}
+            if update:
+                payload["force"] = update
+            response = self._make_request(
+                payload, f"dataset/{dataset_id}/annotate_segmentation"
+            )
+            if STATUS_CODE_KEY in response:
+                agg_response[ERRORS_KEY] = response
+            else:
+                agg_response[ANNOTATIONS_PROCESSED_KEY] += response[
+                    ANNOTATIONS_PROCESSED_KEY
+                ]
+                agg_response[ANNOTATIONS_IGNORED_KEY] += response[
+                    ANNOTATIONS_IGNORED_KEY
+                ]
+
         return agg_response
 
     def ingest_tasks(self, dataset_id: str, payload: dict):
diff --git a/nucleus/annotation.py b/nucleus/annotation.py
@@ -75,15 +75,15 @@ def __str__(self):
     def from_json(cls, payload: dict):
         return cls(
             mask_url=payload[MASK_URL_KEY],
-            annotations=payload[ANNOTATIONS_KEY],
+            annotations=[Segment.from_json(ann) for ann in payload.get(ANNOTATIONS_KEY, [])],
             reference_id=payload.get(REFERENCE_ID_KEY, None),
             item_id=payload.get(ITEM_ID_KEY, None),
         )
 
     def to_payload(self) -> dict:
         payload = {
             MASK_URL_KEY: self.mask_url,
-            ANNOTATIONS_KEY: [ann.to_payload for ann in self.annotations],
+            ANNOTATIONS_KEY: [ann.to_payload() for ann in self.annotations],
         }
         if self.reference_id:
             payload[REFERENCE_ID_KEY] = self.reference_id
diff --git a/nucleus/constants.py b/nucleus/constants.py
@@ -50,4 +50,4 @@
 GEOMETRY_KEY = "geometry"
 AUTOTAGS_KEY = "autotags"
 MASK_URL_KEY = "mask_url"
-INDEX_KEY = "index_key"
+INDEX_KEY = "index"
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -1,4 +1,8 @@
 from pathlib import Path
+from urllib.parse import urlparse
+import boto3
+
+PRESIGN_EXPIRY_SECONDS = 60*60*24*2 #2 days
 
 TEST_MODEL_NAME = '[PyTest] Test Model'
 TEST_MODEL_REFERENCE = '[PyTest] Test Model Reference'
@@ -13,6 +17,27 @@
     's3://scaleapi-attachments/BDD/BDD/bdd100k/images/100k/train/89b42832-10d662f4.jpg',
 ]
 
+def get_signed_url(url):
+    bucket, key = get_s3_details(url)
+    return s3_sign(bucket, key)
+
+def get_s3_details(url):
+    # Expects S3 URL format to be https://<BUCKET>.s3.amazonaws.com/<KEY>
+    parsed = urlparse(url)
+    bucket = parsed.netloc[:parsed.netloc.find(".")]
+    return bucket, parsed.path[1:]
+
+def s3_sign(bucket, key):
+    s3 = boto3.client("s3")
+    return s3.generate_presigned_url(
+        ClientMethod="get_object",
+        Params={
+            "Bucket": bucket,
+            "Key": key,
+        },
+        ExpiresIn=PRESIGN_EXPIRY_SECONDS,
+    )
+    
 def reference_id_from_url(url):
     return Path(url).name
 
@@ -45,6 +70,18 @@ def reference_id_from_url(url):
     for i in range(len(TEST_IMG_URLS))
 ]
 
+TEST_MASK_URL = "https://scale-temp.s3.amazonaws.com/scale-select/nucleus/mscoco_semseg_masks_uint8/000000000285.png"
+TEST_SEGMENTATION_ANNOTATIONS = [
+    {
+        "reference_id": reference_id_from_url(TEST_IMG_URLS[i]),
+        "mask_url": get_signed_url(TEST_MASK_URL),
+        "annotations": [
+            {"label": "bear", "index": 2}, {"label": "grass-merged", "index": 1}
+        ]
+    }
+    for i in range(len(TEST_IMG_URLS))
+]
+
 TEST_BOX_PREDICTIONS = [
     {
         **TEST_BOX_ANNOTATIONS[i],
diff --git a/tests/test_annotation.py b/tests/test_annotation.py
@@ -5,12 +5,13 @@
     TEST_IMG_URLS,
     TEST_BOX_ANNOTATIONS,
     TEST_POLYGON_ANNOTATIONS,
+    TEST_SEGMENTATION_ANNOTATIONS,
     reference_id_from_url,
     assert_box_annotation_matches_dict,
     assert_polygon_annotation_matches_dict,
 )
 
-from nucleus import BoxAnnotation, PolygonAnnotation, DatasetItem
+from nucleus import BoxAnnotation, PolygonAnnotation, SegmentationAnnotation, DatasetItem
 from nucleus.constants import ERROR_PAYLOAD
 
 @pytest.fixture()
@@ -58,6 +59,59 @@ def test_polygon_gt_upload(dataset):
     response_annotation = response[0]
     assert_polygon_annotation_matches_dict(response_annotation, TEST_POLYGON_ANNOTATIONS[0])
 
+def test_single_semseg_gt_upload(dataset):
+    annotation = SegmentationAnnotation.from_json(TEST_SEGMENTATION_ANNOTATIONS[0])
+    response = dataset.annotate(annotations=[annotation])
+    assert response['dataset_id'] == dataset.id
+    assert response['annotations_processed'] == 1
+    assert response['annotations_ignored'] == 0
+    # assert_box_annotation_matches_dict(response_annotation, TEST_BOX_ANNOTATIONS[0])
+
+def test_batch_semseg_gt_upload(dataset):
+    annotations = [SegmentationAnnotation.from_json(ann) for ann in TEST_SEGMENTATION_ANNOTATIONS]
+    response = dataset.annotate(annotations=annotations)
+    assert response['dataset_id'] == dataset.id
+    assert response['annotations_processed'] == 5
+    assert response['annotations_ignored'] == 0
+
+def test_batch_semseg_gt_upload_ignore(dataset):
+    # First upload annotations
+    annotations = [SegmentationAnnotation.from_json(ann) for ann in TEST_SEGMENTATION_ANNOTATIONS]
+    response = dataset.annotate(annotations=annotations)
+    assert response['dataset_id'] == dataset.id
+    assert response['annotations_processed'] == 5
+    assert response['annotations_ignored'] == 0
+
+    #When we re-upload, expect them to be ignored
+    response = dataset.annotate(annotations=annotations)
+    assert response['dataset_id'] == dataset.id
+    assert response['annotations_processed'] == 0
+    assert response['annotations_ignored'] == 5
+
+def test_batch_semseg_gt_upload_update(dataset):
+    # First upload annotations
+    annotations = [SegmentationAnnotation.from_json(ann) for ann in TEST_SEGMENTATION_ANNOTATIONS]
+    response = dataset.annotate(annotations=annotations)
+    assert response['dataset_id'] == dataset.id
+    assert response['annotations_processed'] == 5
+    assert response['annotations_ignored'] == 0
+
+    #When we re-upload, expect them to be ignored
+    response = dataset.annotate(annotations=annotations, update=True)
+    assert response['dataset_id'] == dataset.id
+    assert response['annotations_processed'] == 5
+    assert response['annotations_ignored'] == 0
+
+
+def test_mixed_annotation_upload(dataset):
+    # First upload annotations
+    semseg_annotations = [SegmentationAnnotation.from_json(ann) for ann in TEST_SEGMENTATION_ANNOTATIONS]
+    bbox_annotations = [BoxAnnotation(**(ann)) for ann in TEST_BOX_ANNOTATIONS]
+    annotations = bbox_annotations + semseg_annotations
+    response = dataset.annotate(annotations=annotations)
+    assert response['dataset_id'] == dataset.id
+    assert response['annotations_processed'] == 10
+    assert response['annotations_ignored'] == 0
 
 def test_box_gt_upload_update(dataset):
     annotation = BoxAnnotation(**TEST_BOX_ANNOTATIONS[0])