dataset.get_{image|object}_index_status (#230)

jihan-yin · web-flow · commit 879a2dbe08f0 · 2022-05-14T16:59:37.000-07:00
Grabs the indexing status of your primary image or object index - # of total items and # of items indexed
diff --git a/nucleus/__init__.py b/nucleus/__init__.py
@@ -841,6 +841,10 @@ def make_request(
         if payload is None:
             payload = {}
         if requests_command is requests.get:
+            if payload:
+                print(
+                    "Received defined payload with GET request! Will ignore payload"
+                )
             payload = None
         return self._connection.make_request(payload, route, requests_command)  # type: ignore
 
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -1007,6 +1007,45 @@ def set_continuous_indexing(self, enable: bool = True):
 
         return response
 
+    def get_image_indexing_status(self):
+        """Gets the primary image index progress for the dataset.
+
+        Returns:
+            Response payload::
+
+                {
+                    "embedding_count": int
+                    "image_count": int
+                    "percent_indexed": float
+                    "additional_context": str
+                }
+        """
+        return self._client.make_request(
+            {"image": True},
+            f"dataset/{self.id}/indexingStatus",
+            requests_command=requests.post,
+        )
+
+    def get_object_indexing_status(self, model_run_id=None):
+        """Gets the primary object index progress of the dataset.
+        If model_run_id is not specified, this endpoint will retrieve the indexing progress of the ground truth objects.
+
+        Returns:
+            Response payload::
+
+                {
+                    "embedding_count": int
+                    "object_count": int
+                    "percent_indexed": float
+                    "additional_context": str
+                }
+        """
+        return self._client.make_request(
+            {"image": False, "model_run_id": model_run_id},
+            f"dataset/{self.id}/indexingStatus",
+            requests_command=requests.post,
+        )
+
     def create_image_index(self):
         """Creates or updates image index by generating embeddings for images that do not already have embeddings.
 
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -16,7 +16,7 @@
 TEST_SLICE_NAME = "[PyTest] Test Slice"
 TEST_PROJECT_ID = "60b699d70f139e002dd31bfc"
 
-DATASET_WITH_AUTOTAG = "ds_c8jwdhy4y4f0078hzceg"
+DATASET_WITH_EMBEDDINGS = "ds_c8jwdhy4y4f0078hzceg"
 NUCLEUS_PYTEST_USER_ID = "60ad648c85db770026e9bf77"
 
 EVAL_FUNCTION_THRESHOLD = 0.5
diff --git a/tests/test_autotag.py b/tests/test_autotag.py
@@ -4,15 +4,18 @@
 
 from nucleus.dataset import Dataset
 from nucleus.errors import NucleusAPIError
-from tests.helpers import DATASET_WITH_AUTOTAG, running_as_nucleus_pytest_user
+from tests.helpers import (
+    DATASET_WITH_EMBEDDINGS,
+    running_as_nucleus_pytest_user,
+)
 
 # TODO: Test delete_autotag once API support for autotag creation is added.
 
 
 @pytest.mark.integration
 def test_update_autotag(CLIENT):
     if running_as_nucleus_pytest_user(CLIENT):
-        job = Dataset(DATASET_WITH_AUTOTAG, CLIENT).update_autotag(
+        job = Dataset(DATASET_WITH_EMBEDDINGS, CLIENT).update_autotag(
             "tag_c8jwr0rpy1w00e134an0"
         )
         job.sleep_until_complete()
@@ -24,12 +27,12 @@ def test_dataset_export_autotag_training_items(CLIENT):
     # This test can only run for the test user who has an indexed dataset.
     # TODO: if/when we can create autotags via api, create one instead.
     if running_as_nucleus_pytest_user(CLIENT):
-        dataset = CLIENT.get_dataset(DATASET_WITH_AUTOTAG)
+        dataset = CLIENT.get_dataset(DATASET_WITH_EMBEDDINGS)
 
         with pytest.raises(NucleusAPIError) as api_error:
             dataset.autotag_training_items(autotag_name="NONSENSE_GARBAGE")
         assert (
-            f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_AUTOTAG}"
+            f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_EMBEDDINGS}"
             in str(api_error.value)
         )
 
@@ -52,7 +55,9 @@ def test_dataset_export_autotag_training_items(CLIENT):
 
 def test_export_embeddings(CLIENT):
     if running_as_nucleus_pytest_user(CLIENT):
-        embeddings = Dataset(DATASET_WITH_AUTOTAG, CLIENT).export_embeddings()
+        embeddings = Dataset(
+            DATASET_WITH_EMBEDDINGS, CLIENT
+        ).export_embeddings()
         assert "embedding_vector" in embeddings[0]
         assert "reference_id" in embeddings[0]
 
@@ -61,12 +66,12 @@ def test_dataset_export_autotag_tagged_items(CLIENT):
     # This test can only run for the test user who has an indexed dataset.
     # TODO: if/when we can create autotags via api, create one instead.
     if running_as_nucleus_pytest_user(CLIENT):
-        dataset = CLIENT.get_dataset(DATASET_WITH_AUTOTAG)
+        dataset = CLIENT.get_dataset(DATASET_WITH_EMBEDDINGS)
 
         with pytest.raises(NucleusAPIError) as api_error:
             dataset.autotag_items(autotag_name="NONSENSE_GARBAGE")
         assert (
-            f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_AUTOTAG}"
+            f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_EMBEDDINGS}"
             in str(api_error.value)
         )
 
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -29,6 +29,7 @@
 from nucleus.job import AsyncJob, JobError
 
 from .helpers import (
+    DATASET_WITH_EMBEDDINGS,
     LOCAL_FILENAME,
     TEST_BOX_ANNOTATIONS,
     TEST_CATEGORY_ANNOTATIONS,
@@ -556,3 +557,28 @@ def test_dataset_item_iterator(dataset):
     }
     for key in expected_items:
         assert actual_items[key] == expected_items[key]
+
+
+@pytest.mark.integration
+def test_dataset_get_image_indexing_status(CLIENT):
+    dataset = Dataset(DATASET_WITH_EMBEDDINGS, CLIENT)
+    resp = dataset.get_image_indexing_status()
+    print(resp)
+    assert resp["embedding_count"] == 170
+    assert resp["image_count"] == 170
+    assert "object_count" not in resp
+    assert round(resp["percent_indexed"], 2) == round(
+        resp["image_count"] / resp["embedding_count"], 2
+    )
+
+
+@pytest.mark.integration
+def test_dataset_get_object_indexing_status(CLIENT):
+    dataset = Dataset(DATASET_WITH_EMBEDDINGS, CLIENT)
+    resp = dataset.get_object_indexing_status()
+    assert resp["embedding_count"] == 422
+    assert resp["object_count"] == 423
+    assert "image_count" not in resp
+    assert round(resp["percent_indexed"], 2) == round(
+        resp["object_count"] / resp["embedding_count"], 2
+    )