Improve Categorization API Error Messages (#182)

cmpajot · web-flow · commit 8293cd5e7b36 · 2021-12-28T18:31:46.000-07:00
* Passing sync tests

* Added async integration tests

* Updated error messages
diff --git a/nucleus/__init__.py b/nucleus/__init__.py
@@ -468,6 +468,7 @@ def annotate_dataset(
             DATASET_ID_KEY: dataset_id,
             ANNOTATIONS_PROCESSED_KEY: 0,
             ANNOTATIONS_IGNORED_KEY: 0,
+            ERRORS_KEY: [],
         }
 
         total_batches = len(batches) + len(semseg_batches)
@@ -490,6 +491,7 @@ def annotate_dataset(
                     agg_response[ANNOTATIONS_IGNORED_KEY] += response[
                         ANNOTATIONS_IGNORED_KEY
                     ]
+                    agg_response[ERRORS_KEY] += response[ERRORS_KEY]
 
             for s_batch in semseg_batches:
                 payload = construct_segmentation_payload(s_batch, update)
@@ -628,6 +630,8 @@ def predict(
             else:
                 predictions_processed += response[PREDICTIONS_PROCESSED_KEY]
                 predictions_ignored += response[PREDICTIONS_IGNORED_KEY]
+                if ERRORS_KEY in response:
+                    errors += response[ERRORS_KEY]
 
         for s_batch in s_batches:
             payload = construct_segmentation_payload(s_batch, update)
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -180,6 +180,14 @@ def reference_id_from_url(url):
     for i in range(len(TEST_IMG_URLS))
 ]
 
+TEST_NONEXISTENT_TAXONOMY_CATEGORY_ANNOTATION = [
+    {
+        "label": "[Pytest] Category Label 0",
+        "reference_id": reference_id_from_url(TEST_IMG_URLS[0]),
+        "taxonomy_name": "[Pytest] Category Taxonomy Nonexistent",
+    }
+]
+
 TEST_MULTICATEGORY_ANNOTATIONS = [
     {
         "labels": [
@@ -290,6 +298,15 @@ def reference_id_from_url(url):
     for i in range(len(TEST_DEFAULT_CATEGORY_ANNOTATIONS))
 ]
 
+TEST_NONEXISTENT_TAXONOMY_CATEGORY_PREDICTION = [
+    {
+        "label": "[Pytest] Category Label 0",
+        "reference_id": reference_id_from_url(TEST_IMG_URLS[0]),
+        "taxonomy_name": "[Pytest] Category Taxonomy Nonexistent",
+        "confidence": 0.10,
+    }
+]
+
 TEST_INDEX_EMBEDDINGS_FILE = "https://raw.githubusercontent.com/scaleapi/nucleus-python-client/master/tests/testdata/pytest_embeddings_payload.json"
 
 
diff --git a/tests/test_annotation.py b/tests/test_annotation.py
@@ -11,7 +11,7 @@
     SegmentationAnnotation,
 )
 from nucleus.constants import ERROR_PAYLOAD
-from nucleus.job import AsyncJob
+from nucleus.job import AsyncJob, JobError
 
 from .helpers import (
     TEST_BOX_ANNOTATIONS,
@@ -21,6 +21,7 @@
     TEST_DEFAULT_MULTICATEGORY_ANNOTATIONS,
     TEST_IMG_URLS,
     TEST_MULTICATEGORY_ANNOTATIONS,
+    TEST_NONEXISTENT_TAXONOMY_CATEGORY_ANNOTATION,
     TEST_POLYGON_ANNOTATIONS,
     TEST_SEGMENTATION_ANNOTATIONS,
     assert_box_annotation_matches_dict,
@@ -162,6 +163,21 @@ def test_default_category_gt_upload(dataset):
     )
 
 
+def test_non_existent_taxonomy_category_gt_upload(dataset):
+    annotation = CategoryAnnotation.from_json(
+        TEST_NONEXISTENT_TAXONOMY_CATEGORY_ANNOTATION[0]
+    )
+    response = dataset.annotate(annotations=[annotation])
+
+    assert response["dataset_id"] == dataset.id
+    assert response["annotations_processed"] == 0
+    assert response["annotations_ignored"] == 0
+    assert (
+        f'Input validation failed: Taxonomy {TEST_NONEXISTENT_TAXONOMY_CATEGORY_ANNOTATION[0]["taxonomy_name"]} does not exist in dataset {dataset.id}'
+        in response["errors"][0]
+    )
+
+
 def test_multicategory_gt_upload(dataset):
     annotation = MultiCategoryAnnotation.from_json(
         TEST_MULTICATEGORY_ANNOTATIONS[0]
@@ -745,3 +761,35 @@ def test_default_category_gt_upload_async(dataset):
         "completed_steps": 1,
         "total_steps": 1,
     }
+
+
+@pytest.mark.integration
+def test_non_existent_taxonomy_category_gt_upload_async(dataset):
+    annotation = CategoryAnnotation.from_json(
+        TEST_NONEXISTENT_TAXONOMY_CATEGORY_ANNOTATION[0]
+    )
+
+    try:
+        job: AsyncJob = dataset.annotate(
+            annotations=[
+                annotation,
+            ],
+            asynchronous=True,
+        )
+        job.sleep_until_complete()
+    except JobError:
+        assert (
+            f'Input validation failed: Taxonomy {TEST_NONEXISTENT_TAXONOMY_CATEGORY_ANNOTATION[0]["taxonomy_name"]} does not exist in dataset {dataset.id}'
+            in job.errors()[-1]
+        )
+
+    assert job.status() == {
+        "job_id": job.job_id,
+        "status": "Errored",
+        "message": {
+            "status_log": "No additional information can be provided at this time."
+        },
+        "job_progress": "0.00",
+        "completed_steps": 0,
+        "total_steps": 1,
+    }
diff --git a/tests/test_prediction.py b/tests/test_prediction.py
@@ -14,7 +14,7 @@
     SegmentationPrediction,
 )
 from nucleus.constants import ERROR_PAYLOAD
-from nucleus.job import AsyncJob
+from nucleus.job import AsyncJob, JobError
 
 from .helpers import (
     TEST_BOX_PREDICTIONS,
@@ -24,6 +24,7 @@
     TEST_IMG_URLS,
     TEST_MODEL_NAME,
     TEST_MODEL_RUN,
+    TEST_NONEXISTENT_TAXONOMY_CATEGORY_PREDICTION,
     TEST_POLYGON_PREDICTIONS,
     TEST_SEGMENTATION_PREDICTIONS,
     assert_box_prediction_matches_dict,
@@ -119,7 +120,7 @@ def test_polygon_pred_upload(model_run):
     response = model_run.predict(annotations=[prediction])
 
     assert response["model_run_id"] == model_run.model_run_id
-    assert response["predictions_ignored"] == 0
+    assert response["predictions_processed"] == 1
     assert response["predictions_ignored"] == 0
 
     response = model_run.refloc(prediction.reference_id)["polygon"]
@@ -134,7 +135,7 @@ def test_category_pred_upload(model_run):
     response = model_run.predict(annotations=[prediction])
 
     assert response["model_run_id"] == model_run.model_run_id
-    assert response["predictions_ignored"] == 0
+    assert response["predictions_processed"] == 1
     assert response["predictions_ignored"] == 0
 
     response = model_run.refloc(prediction.reference_id)["category"]
@@ -151,7 +152,7 @@ def test_default_category_pred_upload(model_run):
     response = model_run.predict(annotations=[prediction])
 
     assert response["model_run_id"] == model_run.model_run_id
-    assert response["predictions_ignored"] == 0
+    assert response["predictions_processed"] == 1
     assert response["predictions_ignored"] == 0
 
     response = model_run.refloc(prediction.reference_id)["category"]
@@ -161,6 +162,20 @@ def test_default_category_pred_upload(model_run):
     )
 
 
+def test_non_existent_taxonomy_category_gt_upload(model_run):
+    prediction = CategoryPrediction.from_json(
+        TEST_NONEXISTENT_TAXONOMY_CATEGORY_PREDICTION[0]
+    )
+    response = model_run.predict(annotations=[prediction])
+    assert response["model_run_id"] == model_run.model_run_id
+    assert response["predictions_processed"] == 0
+    assert response["predictions_ignored"] == 0
+    assert (
+        f'Input validation failed: Taxonomy {TEST_NONEXISTENT_TAXONOMY_CATEGORY_PREDICTION[0]["taxonomy_name"]} does not exist in dataset'
+        in response["errors"][0]
+    )
+
+
 def test_segmentation_pred_upload(model_run):
     prediction = SegmentationPrediction.from_json(
         TEST_SEGMENTATION_PREDICTIONS[0]
@@ -576,3 +591,34 @@ def test_default_category_pred_upload_async(model_run: ModelRun):
         "completed_steps": 1,
         "total_steps": 1,
     }
+
+
+@pytest.mark.integration
+def test_non_existent_taxonomy_category_gt_upload_async(model_run: ModelRun):
+    prediction = CategoryPrediction.from_json(
+        TEST_NONEXISTENT_TAXONOMY_CATEGORY_PREDICTION[0]
+    )
+    try:
+        job: AsyncJob = model_run.predict(
+            annotations=[
+                prediction,
+            ],
+            asynchronous=True,
+        )
+        job.sleep_until_complete()
+    except JobError:
+        assert (
+            f'Input validation failed: Taxonomy {TEST_NONEXISTENT_TAXONOMY_CATEGORY_PREDICTION[0]["taxonomy_name"]} does not exist in dataset'
+            in job.errors()[-1]
+        )
+
+    assert job.status() == {
+        "job_id": job.job_id,
+        "status": "Errored",
+        "message": {
+            "status_log": "No additional information can be provided at this time."
+        },
+        "job_progress": "0.00",
+        "completed_steps": 0,
+        "total_steps": 1,
+    }