First try :)

Diego Ardila · Diego Ardila · commit 5fffaefe964c · 2021-05-26T17:25:14.000-07:00
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -8,14 +8,15 @@
     serialize_and_write_to_presigned_url,
 )
 
-from .annotation import Annotation
+from .annotation import Annotation, check_all_annotation_paths_remote
 from .constants import (
     DATASET_ITEM_IDS_KEY,
     DATASET_LENGTH_KEY,
     DATASET_MODEL_RUNS_KEY,
     DATASET_NAME_KEY,
     DATASET_SLICES_KEY,
     DEFAULT_ANNOTATION_UPDATE_MODE,
+    JOB_ID_KEY,
     NAME_KEY,
     REFERENCE_IDS_KEY,
     REQUEST_ID_KEY,
@@ -143,7 +144,8 @@ def annotate(
         annotations: List[Annotation],
         update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
         batch_size: int = 5000,
-    ) -> dict:
+        asynchronous: bool = False,
+    ) -> Union[dict[str, Any], AsyncJob]:
         """
         Uploads ground truth annotations for a given dataset.
         :param annotations: ground truth annotations for a given dataset to upload
@@ -156,6 +158,19 @@ def annotate(
             "ignored_items": int,
         }
         """
+        if asynchronous:
+            check_all_annotation_paths_remote(annotations)
+
+            request_id = serialize_and_write_to_presigned_url(
+                annotations, self.id, self._client
+            )
+            response = self._client.make_request(
+                payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
+                route=f"dataset/{self.id}/annotate?async=1",
+            )
+
+            return AsyncJob(response[JOB_ID_KEY], self._client)
+
         return self._client.annotate_dataset(
             self.id, annotations, update=update, batch_size=batch_size
         )
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -1,8 +1,16 @@
-from nucleus.job import JobError
+from nucleus.annotation import (
+    BoxAnnotation,
+    PolygonAnnotation,
+    SegmentationAnnotation,
+)
+from nucleus.job import AsyncJob, JobError
 import pytest
 import os
 
 from .helpers import (
+    TEST_BOX_ANNOTATIONS,
+    TEST_POLYGON_ANNOTATIONS,
+    TEST_SEGMENTATION_ANNOTATIONS,
     TEST_SLICE_NAME,
     TEST_DATASET_NAME,
     TEST_IMG_URLS,
@@ -238,3 +246,73 @@ def test_dataset_export_autotag_scores(CLIENT):
         for column in ["dataset_item_ids", "ref_ids", "scores"]:
             assert column in scores
             assert len(scores[column]) > 0
+
+
+def test_annotate_async(dataset: Dataset):
+    semseg = SegmentationAnnotation.from_json(TEST_SEGMENTATION_ANNOTATIONS[0])
+    polygon = PolygonAnnotation(**TEST_POLYGON_ANNOTATIONS[0])
+    bbox = BoxAnnotation(**TEST_BOX_ANNOTATIONS[0])
+    bbox.reference_id = "fake_garbage"
+
+    job: AsyncJob = dataset.annotate(
+        annotations=[semseg, polygon, bbox],
+        asynchronous=True,
+    )
+    job.sleep_until_complete()
+
+    assert job.status() == {
+        "job_id": job.id,
+        "status": "Completed",
+        "message": {
+            "annotation_upload": {
+                "epoch": 1,
+                "total": 2,
+                "errored": 0,
+                "ignored": 0,
+                "datasetId": dataset.id,
+                "processed": 2,
+            },
+            "segmentation_upload": {
+                "errors": [],
+                "ignored": 0,
+                "n_errors": 0,
+                "processed": 1,
+            },
+        },
+    }
+
+
+def test_annotate_async_with_error(dataset: Dataset):
+    semseg = SegmentationAnnotation.from_json(TEST_SEGMENTATION_ANNOTATIONS[0])
+    polygon = PolygonAnnotation(**TEST_POLYGON_ANNOTATIONS[0])
+    bbox = BoxAnnotation(**TEST_BOX_ANNOTATIONS[0])
+    bbox.reference_id = "fake_garbage"
+
+    job: AsyncJob = dataset.annotate(
+        annotations=[semseg, polygon, bbox],
+        asynchronous=True,
+    )
+    job.sleep_until_complete()
+
+    assert job.status() == {
+        "job_id": job.id,
+        "status": "Completed",
+        "message": {
+            "annotation_upload": {
+                "epoch": 1,
+                "total": 2,
+                "errored": 0,
+                "ignored": 0,
+                "datasetId": dataset.id,
+                "processed": 1,
+            },
+            "segmentation_upload": {
+                "errors": [],
+                "ignored": 0,
+                "n_errors": 0,
+                "processed": 1,
+            },
+        },
+    }
+
+    assert "Item with id fake_garbage doesn" in str(job.errors())