API for updating autotag (#161)

ardila · Ubuntu · web-flow · commit ab868f07b110 · 2021-12-07T14:03:42.000-08:00
* tests cleaned up and still pass

* more docs

* fix tests not running in circleci

* fix broken test by creating slice

Co-authored-by: Ubuntu &lt;diego.ardila@scale.com&gt;
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -700,6 +700,28 @@ def list_autotags(self):
         """
         return self._client.list_autotags(self.id)
 
+    def update_autotag(self, autotag_id):
+        """Will rerun inference on all dataset items in the dataset.
+        For now this endpoint does not try to skip already inferenced items, but this
+        improvement is planned for the future. This means that for now, you can only
+        have one job running at time, so please await the result using job.sleep_until_complete()
+        before launching another job.
+
+        Parameters:
+            autotag_id: Id of the autotag to re-inference. You can figure out which
+            id you want by using dataset.list_autotags, or by looking at the URL in the
+            manage autotag page.
+
+        Returns:
+          :class:`AsyncJob`: Asynchronous job object to track processing status.
+        """
+        return AsyncJob.from_json(
+            payload=self._client.make_request(
+                {}, f"autotag/{autotag_id}", requests.post
+            ),
+            client=self._client,
+        )
+
     def create_custom_index(
         self, embeddings_urls: List[str], embedding_dim: int
     ):
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -1,6 +1,5 @@
-import time
+import os
 from pathlib import Path
-from urllib.parse import urlparse
 
 from nucleus import BoxPrediction, DatasetItem
 
@@ -402,3 +401,11 @@ def assert_category_prediction_matches_dict(
         prediction_instance, prediction_dict
     )
     assert prediction_instance.confidence == prediction_dict["confidence"]
+
+
+def running_as_nucleus_pytest_user(client):
+    if NUCLEUS_PYTEST_USER_ID in client.api_key:
+        return True
+    if os.environ.get("NUCLEUS_PYTEST_USER_ID") == NUCLEUS_PYTEST_USER_ID:
+        return True
+    return False
diff --git a/tests/test_autotag.py b/tests/test_autotag.py
@@ -1 +1,95 @@
+import os
+
+import pytest
+
+from nucleus.dataset import Dataset
+from nucleus.errors import NucleusAPIError
+from tests.helpers import DATASET_WITH_AUTOTAG, running_as_nucleus_pytest_user
+
 # TODO: Test delete_autotag once API support for autotag creation is added.
+
+
+@pytest.mark.integration
+def test_update_autotag(CLIENT):
+    if running_as_nucleus_pytest_user(CLIENT):
+        job = Dataset(DATASET_WITH_AUTOTAG, CLIENT).update_autotag(
+            "tag_c5jwvzzde8c00604mkx0"
+        )
+        job.sleep_until_complete()
+        status = job.status()
+        assert status["status"] == "Completed"
+
+
+def test_dataset_export_autotag_training_items(CLIENT):
+    # This test can only run for the test user who has an indexed dataset.
+    # TODO: if/when we can create autotags via api, create one instead.
+    if running_as_nucleus_pytest_user(CLIENT):
+        dataset = CLIENT.get_dataset(DATASET_WITH_AUTOTAG)
+
+        with pytest.raises(NucleusAPIError) as api_error:
+            dataset.autotag_training_items(autotag_name="NONSENSE_GARBAGE")
+        assert (
+            f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_AUTOTAG}"
+            in str(api_error.value)
+        )
+
+        items = dataset.autotag_training_items(autotag_name="PytestTestTag")
+
+        assert "autotagPositiveTrainingItems" in items
+        assert "autotag" in items
+
+        autotagTrainingItems = items["autotagPositiveTrainingItems"]
+        autotag = items["autotag"]
+
+        assert len(autotagTrainingItems) > 0
+        for item in autotagTrainingItems:
+            for column in ["ref_id"]:
+                assert column in item
+
+        for column in ["id", "name", "status", "autotag_level"]:
+            assert column in autotag
+
+
+def test_export_embeddings(CLIENT):
+    if running_as_nucleus_pytest_user(CLIENT):
+        embeddings = Dataset(DATASET_WITH_AUTOTAG, CLIENT).export_embeddings()
+        assert "embedding_vector" in embeddings[0]
+        assert "reference_id" in embeddings[0]
+
+
+def test_dataset_export_autotag_tagged_items(CLIENT):
+    # This test can only run for the test user who has an indexed dataset.
+    # TODO: if/when we can create autotags via api, create one instead.
+    if running_as_nucleus_pytest_user(CLIENT):
+        dataset = CLIENT.get_dataset(DATASET_WITH_AUTOTAG)
+
+        with pytest.raises(NucleusAPIError) as api_error:
+            dataset.autotag_items(autotag_name="NONSENSE_GARBAGE")
+        assert (
+            f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_AUTOTAG}"
+            in str(api_error.value)
+        )
+
+        items = dataset.autotag_items(autotag_name="PytestTestTag")
+
+        assert "autotagItems" in items
+        assert "autotag" in items
+
+        autotagItems = items["autotagItems"]
+        autotag = items["autotag"]
+
+        assert len(autotagItems) > 0
+        for item in autotagItems:
+            for column in ["ref_id", "score"]:
+                assert column in item
+
+        for column in ["id", "name", "status", "autotag_level"]:
+            assert column in autotag
+
+
+def test_export_slice_embeddings(CLIENT):
+    if running_as_nucleus_pytest_user(CLIENT):
+        test_slice = CLIENT.get_slice("slc_c6kcx5mrzr7g0c9d8cng")
+        embeddings = test_slice.export_embeddings()
+        assert "embedding_vector" in embeddings[0]
+        assert "reference_id" in embeddings[0]
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -38,9 +38,7 @@
 from nucleus.prediction import BoxPrediction
 
 from .helpers import (
-    DATASET_WITH_AUTOTAG,
     LOCAL_FILENAME,
-    NUCLEUS_PYTEST_USER_ID,
     TEST_BOX_ANNOTATIONS,
     TEST_CATEGORY_ANNOTATIONS,
     TEST_DATASET_NAME,
@@ -354,66 +352,6 @@ def test_raises_error_for_duplicate():
     )
 
 
-def test_dataset_export_autotag_tagged_items(CLIENT):
-    # This test can only run for the test user who has an indexed dataset.
-    # TODO: if/when we can create autotags via api, create one instead.
-    if os.environ.get("NUCLEUS_PYTEST_USER_ID") == NUCLEUS_PYTEST_USER_ID:
-        dataset = CLIENT.get_dataset(DATASET_WITH_AUTOTAG)
-
-        with pytest.raises(NucleusAPIError) as api_error:
-            dataset.autotag_items(autotag_name="NONSENSE_GARBAGE")
-        assert (
-            f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_AUTOTAG}"
-            in str(api_error.value)
-        )
-
-        items = dataset.autotag_items(autotag_name="PytestTestTag")
-
-        assert "autotagItems" in items
-        assert "autotag" in items
-
-        autotagItems = items["autotagItems"]
-        autotag = items["autotag"]
-
-        assert len(autotagItems) > 0
-        for item in autotagItems:
-            for column in ["ref_id", "score"]:
-                assert column in item
-
-        for column in ["id", "name", "status", "autotag_level"]:
-            assert column in autotag
-
-
-def test_dataset_export_autotag_training_items(CLIENT):
-    # This test can only run for the test user who has an indexed dataset.
-    # TODO: if/when we can create autotags via api, create one instead.
-    if os.environ.get("NUCLEUS_PYTEST_USER_ID") == NUCLEUS_PYTEST_USER_ID:
-        dataset = CLIENT.get_dataset(DATASET_WITH_AUTOTAG)
-
-        with pytest.raises(NucleusAPIError) as api_error:
-            dataset.autotag_training_items(autotag_name="NONSENSE_GARBAGE")
-        assert (
-            f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_AUTOTAG}"
-            in str(api_error.value)
-        )
-
-        items = dataset.autotag_training_items(autotag_name="PytestTestTag")
-
-        assert "autotagPositiveTrainingItems" in items
-        assert "autotag" in items
-
-        autotagTrainingItems = items["autotagPositiveTrainingItems"]
-        autotag = items["autotag"]
-
-        assert len(autotagTrainingItems) > 0
-        for item in autotagTrainingItems:
-            for column in ["ref_id"]:
-                assert column in item
-
-        for column in ["id", "name", "status", "autotag_level"]:
-            assert column in autotag
-
-
 @pytest.mark.integration
 def test_annotate_async(dataset: Dataset):
     dataset.append(make_dataset_items())
@@ -578,10 +516,3 @@ def sort_labelmap(segmentation_annotation):
         exported[0][ANNOTATIONS_KEY][MULTICATEGORY_TYPE][0]
         == multicategory_annotation
     )
-
-
-def test_export_embeddings(CLIENT):
-    if NUCLEUS_PYTEST_USER_ID in CLIENT.api_key:
-        embeddings = Dataset(DATASET_WITH_AUTOTAG, CLIENT).export_embeddings()
-        assert "embedding_vector" in embeddings[0]
-        assert "reference_id" in embeddings[0]
diff --git a/tests/test_slice.py b/tests/test_slice.py
@@ -184,11 +184,3 @@ def test_slice_send_to_labeling(dataset):
 
     response = slc.send_to_labeling(TEST_PROJECT_ID)
     assert isinstance(response, AsyncJob)
-
-
-def test_export_slice_embeddings(CLIENT):
-    test_slice = CLIENT.get_slice("slc_c4s4ts3v7bw00b1hkj0g")
-    if NUCLEUS_PYTEST_USER_ID in CLIENT.api_key:
-        embeddings = test_slice.export_embeddings()
-        assert "embedding_vector" in embeddings[0]
-        assert "reference_id" in embeddings[0]