slice export working

Diego Ardila · Diego Ardila · commit becb53b419cd · 2021-06-15T16:11:41.000-07:00
diff --git a/nucleus/slice.py b/nucleus/slice.py
@@ -1,10 +1,11 @@
-from typing import Dict, List, Iterable, Set, Tuple, Optional, Union
-from nucleus.dataset_item import DatasetItem
+from typing import Dict, Iterable, List, Set, Tuple, Union
+
+import requests
+
 from nucleus.annotation import Annotation
-from nucleus.utils import format_dataset_item_response
+from nucleus.dataset_item import DatasetItem
 from nucleus.job import AsyncJob
-
-from .constants import DEFAULT_ANNOTATION_UPDATE_MODE
+from nucleus.utils import convert_export_payload, format_dataset_item_response
 
 
 class Slice:
@@ -109,42 +110,12 @@ def items_and_annotations(
             * The other value is a dictionary containing all the annotations for this
                 dataset item, sorted by annotation type.
         """
-        return list(self.items_and_annotation_generator())
-
-    def annotate(
-        self,
-        annotations: List[Annotation],
-        update: Optional[bool] = DEFAULT_ANNOTATION_UPDATE_MODE,
-        batch_size: int = 5000,
-        strict=True,
-    ):
-        """Update annotations within this slice.
-
-        Args:
-            annotations: List of annotations to upload
-            batch_size: How many annotations to send per request.
-            strict: Whether to first check that the annotations belong to this slice.
-                Set to false to avoid this check and speed up upload.
-        """
-        if strict:
-            (
-                annotations_are_in_slice,
-                item_ids_not_found_in_slice,
-                reference_ids_not_found_in_slice,
-            ) = check_annotations_are_in_slice(annotations, self)
-            if not annotations_are_in_slice:
-                message = "Not all annotations are in this slice.\n"
-                if item_ids_not_found_in_slice:
-                    message += f"Item ids not found in slice: {item_ids_not_found_in_slice} \n"
-                if reference_ids_not_found_in_slice:
-                    message += f"Reference ids not found in slice: {reference_ids_not_found_in_slice}"
-                raise ValueError(message)
-        self._client.annotate_dataset(
-            dataset_id=self.dataset_id,
-            annotations=annotations,
-            update=update,
-            batch_size=batch_size,
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.slice_id}/exportForTraining",
+            requests_command=requests.get,
         )
+        return convert_export_payload(api_payload["exportedRows"])
 
     def send_to_labeling(self, project_id: str):
         response = self._client.make_request(
diff --git a/nucleus/utils.py b/nucleus/utils.py
@@ -1,16 +1,29 @@
 """Shared stateless utility function library"""
 
-
+from collections import defaultdict
 import io
 import uuid
 from typing import IO, Dict, List, Sequence, Union
 
 import requests
 from requests.models import HTTPError
 
-from nucleus.annotation import Annotation
-
-from .constants import ANNOTATION_TYPES, ANNOTATIONS_KEY, ITEM_KEY
+from nucleus.annotation import (
+    Annotation,
+    BoxAnnotation,
+    PolygonAnnotation,
+    SegmentationAnnotation,
+)
+
+from .constants import (
+    ANNOTATION_TYPES,
+    ANNOTATIONS_KEY,
+    BOX_TYPE,
+    ITEM_KEY,
+    POLYGON_TYPE,
+    REFERENCE_ID_KEY,
+    SEGMENTATION_TYPE,
+)
 from .dataset_item import DatasetItem
 from .prediction import BoxPrediction, PolygonPrediction
 
@@ -73,6 +86,31 @@ def format_dataset_item_response(response: dict) -> dict:
     }
 
 
+def convert_export_payload(api_payload):
+    return_payload = []
+    for row in api_payload:
+        return_payload_row = {}
+        return_payload_row[ITEM_KEY] = DatasetItem.from_json(row[ITEM_KEY])
+        annotations = defaultdict(list)
+        if row[SEGMENTATION_TYPE] is not None:
+            segmentation = row[SEGMENTATION_TYPE]
+            segmentation[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
+            annotations[SEGMENTATION_TYPE] = SegmentationAnnotation.from_json(
+                segmentation
+            )
+        for polygon in row[POLYGON_TYPE]:
+            polygon[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
+            annotations[POLYGON_TYPE].append(
+                PolygonAnnotation.from_json(polygon)
+            )
+        for box in row[BOX_TYPE]:
+            box[REFERENCE_ID_KEY] = row[ITEM_KEY][REFERENCE_ID_KEY]
+            annotations[BOX_TYPE].append(BoxAnnotation.from_json(box))
+        return_payload_row[ANNOTATIONS_KEY] = annotations
+        return_payload.append(return_payload_row)
+    return return_payload
+
+
 def serialize_and_write(
     upload_units: Sequence[Union[DatasetItem, Annotation]], file_pointer
 ):
diff --git a/tests/test_slice.py b/tests/test_slice.py
@@ -1,6 +1,12 @@
+import copy
 import pytest
 from nucleus import Slice, NucleusClient, DatasetItem, BoxAnnotation
-from nucleus.constants import ERROR_PAYLOAD, ITEM_KEY
+from nucleus.constants import (
+    ANNOTATIONS_KEY,
+    BOX_TYPE,
+    ERROR_PAYLOAD,
+    ITEM_KEY,
+)
 from .helpers import (
     TEST_DATASET_NAME,
     TEST_IMG_URLS,
@@ -64,38 +70,41 @@ def test_slice_create_and_delete_and_list(dataset):
         )
 
 
-def test_slice_create_and_annotate(dataset):
+def test_slice_create_and_export(dataset):
     # Dataset upload
     url = TEST_IMG_URLS[0]
     annotation_in_slice = BoxAnnotation(**TEST_BOX_ANNOTATIONS[0])
-    annotation_not_in_slice = BoxAnnotation(**TEST_BOX_ANNOTATIONS[1])
 
-    ds_items = []
-    ds_items.append(
+    ds_items = [
         DatasetItem(
             image_location=url,
             reference_id=reference_id_from_url(url),
-        )
-    )
+            metadata={"test": "metadata"},
+        ),
+        DatasetItem(
+            image_location=url,
+            reference_id="different_item",
+            metadata={"test": "metadata"},
+        ),
+    ]
     response = dataset.append(ds_items)
     assert ERROR_PAYLOAD not in response.json()
 
     # Slice creation
     slc = dataset.create_slice(
         name=TEST_SLICE_NAME,
-        reference_ids=[item.reference_id for item in ds_items[:2]],
+        reference_ids=[item.reference_id for item in ds_items[:1]],
     )
 
-    slc.annotate(annotations=[annotation_in_slice])
-    with pytest.raises(ValueError) as not_in_slice_error:
-        slc.annotate(annotations=[annotation_not_in_slice])
+    dataset.annotate(annotations=[annotation_in_slice])
 
-    assert (
-        annotation_not_in_slice.reference_id
-        in not_in_slice_error.value.args[0]
-    )
+    expected_box_annotation = copy.deepcopy(annotation_in_slice)
+    expected_box_annotation.annotation_id = None
+    expected_box_annotation.metadata = {}
 
-    slc.annotate(annotations=[annotation_not_in_slice], strict=False)
+    exported = slc.items_and_annotations()
+    assert exported[0][ITEM_KEY] == ds_items[0]
+    assert exported[0][ANNOTATIONS_KEY][BOX_TYPE][0] == expected_box_annotation
 
 
 def test_slice_append(dataset):