Tests working

Diego Ardila · Diego Ardila · commit 7d8037f5aaad · 2021-09-03T09:42:51.000-07:00
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -430,6 +430,24 @@ def items_and_annotations(
         )
         return convert_export_payload(api_payload[EXPORTED_ROWS])
 
+    def export_embeddings(
+        self,
+    ) -> List[Dict[str, Union[str, List[float]]]]:
+        """Returns a pd.Dataframe-ready format of dataset embeddings.
+
+        Returns:
+            A list, where each item is a dict with two keys representing a row
+            in the dataset.
+            * One value in the dict is the reference id
+            * The other value is a list of the embedding values
+        """
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"dataset/{self.id}/embeddings",
+            requests_command=requests.get,
+        )
+        return api_payload
+
     def delete_annotations(
         self, reference_ids: list = None, keep_history=False
     ):
diff --git a/nucleus/slice.py b/nucleus/slice.py
@@ -126,6 +126,24 @@ def send_to_labeling(self, project_id: str):
         )
         return AsyncJob.from_json(response, self._client)
 
+    def export_embeddings(
+        self,
+    ) -> List[Dict[str, Union[str, List[float]]]]:
+        """Returns a pd.Dataframe-ready format of dataset embeddings.
+
+        Returns:
+            A list, where each item is a dict with two keys representing a row
+            in the dataset.
+            * One value in the dict is the reference id
+            * The other value is a list of the embedding values
+        """
+        api_payload = self._client.make_request(
+            payload=None,
+            route=f"slice/{self.slice_id}/embeddings",
+            requests_command=requests.get,
+        )
+        return api_payload
+
 
 def check_annotations_are_in_slice(
     annotations: List[Annotation], slice_to_check: Slice
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -13,6 +13,9 @@
 TEST_SLICE_NAME = "[PyTest] Test Slice"
 TEST_PROJECT_ID = "60b699d70f139e002dd31bfc"
 
+DATASET_WITH_AUTOTAG = "ds_c4dgj702e2vjft7m9xa0"
+NUCLEUS_PYTEST_USER_ID = "60ad648c85db770026e9bf77"
+
 
 TEST_IMG_URLS = [
     "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/airplane.jpeg",
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -37,11 +37,11 @@
     TEST_IMG_URLS,
     TEST_POLYGON_ANNOTATIONS,
     TEST_SEGMENTATION_ANNOTATIONS,
+    DATASET_WITH_AUTOTAG,
+    NUCLEUS_PYTEST_USER_ID,
     reference_id_from_url,
 )
 
-TEST_AUTOTAG_DATASET = "ds_bz43jm2jwm70060b3890"
-
 
 def test_reprs():
     # Have to define here in order to have access to all relevant objects
@@ -325,17 +325,17 @@ def test_raises_error_for_duplicate():
 def test_dataset_export_autotag_scores(CLIENT):
     # This test can only run for the test user who has an indexed dataset.
     # TODO: if/when we can create autotags via api, create one instead.
-    if os.environ.get("HAS_ACCESS_TO_TEST_DATA", False):
-        dataset = CLIENT.get_dataset(TEST_AUTOTAG_DATASET)
+    if NUCLEUS_PYTEST_USER_ID in CLIENT.api_key:
+        dataset = CLIENT.get_dataset(DATASET_WITH_AUTOTAG)
 
         with pytest.raises(NucleusAPIError) as api_error:
             dataset.autotag_scores(autotag_name="NONSENSE_GARBAGE")
         assert (
-            f"The autotag NONSENSE_GARBAGE was not found in dataset {TEST_AUTOTAG_DATASET}"
+            f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_AUTOTAG}"
             in str(api_error.value)
         )
 
-        scores = dataset.autotag_scores(autotag_name="TestTag")
+        scores = dataset.autotag_scores(autotag_name="PytestTestTag")
 
         for column in ["dataset_item_ids", "ref_ids", "scores"]:
             assert column in scores
@@ -484,3 +484,10 @@ def sort_labelmap(segmentation_annotation):
     assert exported[0][ANNOTATIONS_KEY][POLYGON_TYPE][0] == clear_fields(
         polygon_annotation
     )
+
+
+def test_export_embeddings(CLIENT):
+    if NUCLEUS_PYTEST_USER_ID in CLIENT.api_key:
+        embeddings = Dataset(DATASET_WITH_AUTOTAG, CLIENT).export_embeddings()
+        assert "embedding_vector" in embeddings[0]
+        assert "reference_id" in embeddings[0]
diff --git a/tests/test_slice.py b/tests/test_slice.py
@@ -1,7 +1,7 @@
 import copy
 import pytest
 import uuid
-from nucleus import Slice, NucleusClient, DatasetItem, BoxAnnotation
+from nucleus import Slice, NucleusClient, DatasetItem, BoxAnnotation, Dataset
 from nucleus.constants import (
     ANNOTATIONS_KEY,
     BOX_TYPE,
@@ -14,6 +14,8 @@
     TEST_SLICE_NAME,
     TEST_BOX_ANNOTATIONS,
     TEST_PROJECT_ID,
+    DATASET_WITH_AUTOTAG,
+    NUCLEUS_PYTEST_USER_ID,
     reference_id_from_url,
 )
 from nucleus.job import AsyncJob
@@ -181,3 +183,11 @@ def test_slice_send_to_labeling(dataset):
 
     response = slc.send_to_labeling(TEST_PROJECT_ID)
     assert isinstance(response, AsyncJob)
+
+
+def test_export_slice_embeddings(CLIENT):
+    test_slice = CLIENT.get_slice("slc_c4s4ts3v7bw00b1hkj0g")
+    if NUCLEUS_PYTEST_USER_ID in CLIENT.api_key:
+        embeddings = test_slice.export_embeddings()
+        assert "embedding_vector" in embeddings[0]
+        assert "reference_id" in embeddings[0]