add method for exporting predictions (#300)

sasha-scale · web-flow · commit 91b874375f0e · 2022-05-19T10:54:25.000-07:00
* add export predictions

* cleanup

* finish test coverage

* address PR comments

* bump version number + describe changes in changelog
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+
+## [0.11.1](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.11.1) - 2022-05-19
+
+### Added
+
+- Exporting model predictions from a slice
+
 ## [0.11.0](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.11.0) - 2022-05-13
 
 ### Added
diff --git a/nucleus/__init__.py b/nucleus/__init__.py
@@ -837,7 +837,6 @@ def make_request(
         Returns:
             Response payload as JSON dict.
         """
-        print(payload, route)
         if payload is None:
             payload = {}
         if requests_command is requests.get:
diff --git a/nucleus/annotation.py b/nucleus/annotation.py
@@ -180,6 +180,19 @@ def to_payload(self) -> dict:
             EMBEDDING_VECTOR_KEY: self.embedding_vector,
         }
 
+    def __eq__(self, other):
+        return (
+            self.label == other.label
+            and self.x == other.x
+            and self.y == other.y
+            and self.width == other.width
+            and self.height == other.height
+            and self.reference_id == other.reference_id
+            and self.annotation_id == other.annotation_id
+            and sorted(self.metadata.items()) == sorted(other.metadata.items())
+            and self.embedding_vector == other.embedding_vector
+        )
+
 
 @dataclass
 class Point:
diff --git a/nucleus/constants.py b/nucleus/constants.py
@@ -105,6 +105,7 @@
 POINTCLOUD_URL_KEY = "pointcloud_url"
 POSITION_KEY = "position"
 PREDICTIONS_IGNORED_KEY = "predictions_ignored"
+PREDICTIONS_KEY = "predictions"
 PREDICTIONS_PROCESSED_KEY = "predictions_processed"
 REFERENCE_IDS_KEY = "reference_ids"
 REFERENCE_ID_KEY = "reference_id"
diff --git a/nucleus/slice.py b/nucleus/slice.py
@@ -234,6 +234,37 @@ def items_and_annotations(
         )
         return convert_export_payload(api_payload[EXPORTED_ROWS])
 
+    def export_predictions(
+        self, model
+    ) -> List[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
+        """Provides a list of all DatasetItems and Predictions in the Slice for the given Model.
+
+        Parameters:
+            model (Model): the nucleus model objects representing the model for which to export predictions.
+
+        Returns:
+            List where each element is a dict containing the DatasetItem
+            and all of its associated Predictions, grouped by type (e.g. box).
+            ::
+
+                List[{
+                    "item": DatasetItem,
+                    "predicions": {
+                        "box": List[BoxAnnotation],
+                        "polygon": List[PolygonAnnotation],
+                        "cuboid": List[CuboidAnnotation],
+                        "segmentation": List[SegmentationAnnotation],
+                        "category": List[CategoryAnnotation],
+                    }
+                }]
+        """
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.id}/{model.id}/exportForTraining",
+            requests_command=requests.get,
+        )
+        return convert_export_payload(api_payload[EXPORTED_ROWS], True)
+
     def send_to_labeling(self, project_id: str):
         """Send items in the Slice as tasks to a Scale labeling project.
 
diff --git a/nucleus/utils.py b/nucleus/utils.py
@@ -37,6 +37,7 @@
     PAGE_SIZE,
     PAGE_TOKEN,
     POLYGON_TYPE,
+    PREDICTIONS_KEY,
     REFERENCE_ID_KEY,
     SEGMENTATION_TYPE,
 )
@@ -187,7 +188,7 @@ def format_dataset_item_response(response: dict) -> dict:
     }
 
 
-def convert_export_payload(api_payload):
+def convert_export_payload(api_payload, has_predictions: bool = False):
     """Helper function to convert raw JSON to API objects
 
     Args:
@@ -237,7 +238,9 @@ def convert_export_payload(api_payload):
             annotations[MULTICATEGORY_TYPE].append(
                 MultiCategoryAnnotation.from_json(multicategory)
             )
-        return_payload_row[ANNOTATIONS_KEY] = annotations
+        return_payload_row[
+            ANNOTATIONS_KEY if not has_predictions else PREDICTIONS_KEY
+        ] = annotations
         return_payload.append(return_payload_row)
     return return_payload
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ exclude = '''
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.11.0"
+version = "0.11.1"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
diff --git a/tests/test_slice.py b/tests/test_slice.py
@@ -3,18 +3,38 @@
 import pytest
 import requests
 
-from nucleus import BoxAnnotation, Dataset, NucleusClient, Slice
-from nucleus.constants import ANNOTATIONS_KEY, BOX_TYPE, ITEM_KEY
+from nucleus import BoxAnnotation, BoxPrediction, Dataset, NucleusClient, Slice
+from nucleus.constants import (
+    ANNOTATIONS_KEY,
+    BOX_TYPE,
+    ITEM_KEY,
+    PREDICTIONS_KEY,
+)
 from nucleus.job import AsyncJob
 
 from .helpers import (
     TEST_BOX_ANNOTATIONS,
+    TEST_BOX_PREDICTIONS,
     TEST_PROJECT_ID,
     TEST_SLICE_NAME,
     get_uuid,
 )
 
 
+@pytest.fixture()
+def slc(CLIENT, dataset):
+    slice_ref_ids = [item.reference_id for item in dataset.items[:1]]
+    # Slice creation
+    slc = dataset.create_slice(
+        name=TEST_SLICE_NAME,
+        reference_ids=slice_ref_ids,
+    )
+
+    yield slc
+
+    CLIENT.delete_slice(slc.id)
+
+
 def test_reprs():
     # Have to define here in order to have access to all relevant objects
     def test_repr(test_object: any):
@@ -89,6 +109,40 @@ def get_expected_item(reference_id):
         ] == get_expected_box_annotation(reference_id)
 
 
+def test_slice_create_and_prediction_export(dataset, slc, model):
+    # Dataset upload
+    ds_items = dataset.items
+
+    predictions = [
+        BoxPrediction(**pred_raw) for pred_raw in TEST_BOX_PREDICTIONS
+    ]
+    response = dataset.upload_predictions(model, predictions)
+
+    assert response
+
+    slice_reference_ids = [item.reference_id for item in slc.items]
+
+    def get_expected_box_prediction(reference_id):
+        for prediction in predictions:
+            if prediction.reference_id == reference_id:
+                return prediction
+
+    def get_expected_item(reference_id):
+        if reference_id not in slice_reference_ids:
+            raise ValueError("Got results outside the slice")
+        for item in ds_items:
+            if item.reference_id == reference_id:
+                return item
+
+    exported = slc.export_predictions(model)
+    for row in exported:
+        reference_id = row[ITEM_KEY].reference_id
+        assert row[ITEM_KEY] == get_expected_item(reference_id)
+        assert row[PREDICTIONS_KEY][BOX_TYPE][
+            0
+        ] == get_expected_box_prediction(reference_id)
+
+
 def test_slice_append(dataset):
     ds_items = dataset.items