Passes dataset test

ardila · ardila · commit d2f3bd9eebed · 2021-04-19T13:06:35.000-07:00
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -1,4 +1,6 @@
 from typing import List, Dict, Any, Optional
+
+from nucleus.utils import format_dataset_item_response
 from .dataset_item import DatasetItem
 from .annotation import (
     Annotation,
@@ -11,10 +13,7 @@
     DATASET_ITEM_IDS_KEY,
     REFERENCE_IDS_KEY,
     NAME_KEY,
-    ITEM_KEY,
     DEFAULT_ANNOTATION_UPDATE_MODE,
-    ANNOTATIONS_KEY,
-    ANNOTATION_TYPES,
 )
 from .payload_constructor import construct_model_run_creation_payload
 
@@ -177,7 +176,7 @@ def iloc(self, i: int) -> dict:
         }
         """
         response = self._client.dataitem_iloc(self.id, i)
-        return self._format_dataset_item_response(response)
+        return format_dataset_item_response(response)
 
     def refloc(self, reference_id: str) -> dict:
         """
@@ -190,7 +189,7 @@ def refloc(self, reference_id: str) -> dict:
         }
         """
         response = self._client.dataitem_ref_id(self.id, reference_id)
-        return self._format_dataset_item_response(response)
+        return format_dataset_item_response(response)
 
     def loc(self, dataset_item_id: str) -> dict:
         """
@@ -203,7 +202,7 @@ def loc(self, dataset_item_id: str) -> dict:
         }
         """
         response = self._client.dataitem_loc(self.id, dataset_item_id)
-        return self._format_dataset_item_response(response)
+        return format_dataset_item_response(response)
 
     def create_slice(
         self,
@@ -245,25 +244,6 @@ def delete_item(self, item_id: str = None, reference_id: str = None):
     def list_autotags(self):
         return self._client.list_autotags(self.id)
 
-    def _format_dataset_item_response(self, response: dict) -> dict:
-        item = response.get(ITEM_KEY, None)
-        annotation_payload = response.get(ANNOTATIONS_KEY, {})
-        if not item or not annotation_payload:
-            # An error occured
-            return response
-
-        annotation_response = {}
-        for annotation_type in ANNOTATION_TYPES:
-            if annotation_type in annotation_payload:
-                annotation_response[annotation_type] = [
-                    Annotation.from_json(ann)
-                    for ann in annotation_payload[annotation_type]
-                ]
-        return {
-            ITEM_KEY: DatasetItem.from_json(item),
-            ANNOTATIONS_KEY: annotation_response,
-        }
-
     def create_custom_index(self, embeddings_url: str):
         return self._client.create_custom_index(self.id, embeddings_url)
 
diff --git a/nucleus/slice.py b/nucleus/slice.py
@@ -1,41 +1,9 @@
-from __future__ import annotations
-
 from typing import List, Iterable, Set, Tuple, Optional
 from nucleus.dataset_item import DatasetItem
 from nucleus.annotation import Annotation
+from nucleus.utils import format_dataset_item_response
 
-from .constants import DEFAULT_ANNOTATION_UPDATE_MODE
-
-
-def check_annotations_are_in_slice(
-    annotations: List[Annotation], slice_to_check: Slice
-) -> Tuple[bool, Set[str], Set[str]]:
-    """Check membership of the annotation targets within this slice.
-
-    annotations: Annnotations with ids referring to targets.
-    slice: The slice to check against.
-    """
-    info = slice_to_check.info()
-    item_ids_not_found_in_slice = {
-        annotation.item_id
-        for annotation in annotations
-        if annotation.item_id is not None
-    }.difference({item_metadata["id"] for item_metadata in info})
-    reference_ids_not_found_in_slice = {
-        annotation.reference_id
-        for annotation in annotations
-        if annotation.reference_id is not None
-    }.difference({item_metadata["reference_id"] for item_metadata in info})
-    if item_ids_not_found_in_slice or reference_ids_not_found_in_slice:
-        annotations_are_in_slice = False
-    else:
-        annotations_are_in_slice = True
-
-    return (
-        annotations_are_in_slice,
-        item_ids_not_found_in_slice,
-        reference_ids_not_found_in_slice,
-    )
+from .constants import DEFAULT_ANNOTATION_UPDATE_MODE, ITEM_KEY
 
 
 class Slice:
@@ -106,13 +74,15 @@ def append(
         return response
 
     def items_generator(self) -> Iterable[DatasetItem]:
-        """Returns an iterable of DatasetItems in this slice."""
+        """Returns an iterable of DatasetItem/Annotation dicts."""
         info = self.info()
         for item_metadata in info["dataset_items"]:
-            yield self._client.dataitem_loc(
-                dataset_id=info["dataset_id"],
-                dataset_item_id=item_metadata["id"],
-            )
+            yield format_dataset_item_response(
+                self._client.dataitem_loc(
+                    dataset_id=info["dataset_id"],
+                    dataset_item_id=item_metadata["id"],
+                )
+            )[ITEM_KEY]
 
     def items(self) -> List[DatasetItem]:
         """Returns a list of all DatasetItems in this slice."""
@@ -152,3 +122,34 @@ def annotate(
             update=update,
             batch_size=batch_size,
         )
+
+
+def check_annotations_are_in_slice(
+    annotations: List[Annotation], slice_to_check: Slice
+) -> Tuple[bool, Set[str], Set[str]]:
+    """Check membership of the annotation targets within this slice.
+
+    annotations: Annnotations with ids referring to targets.
+    slice: The slice to check against.
+    """
+    info = slice_to_check.info()
+    item_ids_not_found_in_slice = {
+        annotation.item_id
+        for annotation in annotations
+        if annotation.item_id is not None
+    }.difference({item_metadata["id"] for item_metadata in info})
+    reference_ids_not_found_in_slice = {
+        annotation.reference_id
+        for annotation in annotations
+        if annotation.reference_id is not None
+    }.difference({item_metadata["reference_id"] for item_metadata in info})
+    if item_ids_not_found_in_slice or reference_ids_not_found_in_slice:
+        annotations_are_in_slice = False
+    else:
+        annotations_are_in_slice = True
+
+    return (
+        annotations_are_in_slice,
+        item_ids_not_found_in_slice,
+        reference_ids_not_found_in_slice,
+    )
diff --git a/nucleus/utils.py b/nucleus/utils.py
@@ -1,8 +1,18 @@
+"""Shared stateless utility function library"""
+
+
 from typing import List, Union, Dict
 
+from nucleus.annotation import Annotation
 from .dataset_item import DatasetItem
 from .prediction import BoxPrediction, PolygonPrediction
 
+from .constants import (
+    ITEM_KEY,
+    ANNOTATIONS_KEY,
+    ANNOTATION_TYPES,
+)
+
 
 def _get_all_field_values(metadata_list: List[dict], key: str):
     return {metadata[key] for metadata in metadata_list if key in metadata}
@@ -34,3 +44,29 @@ def suggest_metadata_schema(
             entry["type"] = "text"
         schema[key] = entry
     return schema
+
+
+def format_dataset_item_response(response: dict) -> dict:
+    """Format the raw client response into api objects."""
+    if ANNOTATIONS_KEY not in response:
+        raise ValueError(
+            f"Server response was missing the annotation key: {response}"
+        )
+    if ITEM_KEY not in response:
+        raise ValueError(
+            f"Server response was missing the item key: {response}"
+        )
+    item = response[ITEM_KEY]
+    annotation_payload = response[ANNOTATIONS_KEY]
+
+    annotation_response = {}
+    for annotation_type in ANNOTATION_TYPES:
+        if annotation_type in annotation_payload:
+            annotation_response[annotation_type] = [
+                Annotation.from_json(ann)
+                for ann in annotation_payload[annotation_type]
+            ]
+    return {
+        ITEM_KEY: DatasetItem.from_json(item),
+        ANNOTATIONS_KEY: annotation_response,
+    }
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -197,8 +197,15 @@ def test_slice_append(dataset):
     all_stored_items = slc.items()
 
     def sort_by_reference_id(items):
+        # Remove the generated item_ids and standardize
+        #  empty metadata so we can do an equality check.
+        for item in items:
+            item.item_id = None
+            if item.metadata == {}:
+                item.metadata = None
         return sorted(items, key=lambda x: x.reference_id)
 
     breakpoint()
-
-    assert tuple(sort_by_reference_id(all_stored_items)) == ds_items
+    assert sort_by_reference_id(all_stored_items) == sort_by_reference_id(
+        ds_items[:3]
+    )