Merge pull request #112 from scaleapi/vinjai/autocurate

ardila · web-flow · commit a5ef70fac6a7 · 2021-09-03T07:40:44.000-07:00
Add Autocurate to API, and add test
diff --git a/.gitignore b/.gitignore
@@ -134,3 +134,6 @@ dmypy.json
 
 # Poetry lockfile (no need for deploys, best practice is to not check this in)
 poetry.lock
+
+# vscode
+.vscode/
diff --git a/nucleus/autocurate.py b/nucleus/autocurate.py
@@ -0,0 +1,29 @@
+import datetime
+import requests
+from nucleus.constants import (
+    JOB_CREATION_TIME_KEY,
+    JOB_LAST_KNOWN_STATUS_KEY,
+    JOB_TYPE_KEY,
+)
+from nucleus.job import AsyncJob
+
+
+def entropy(name, model_runs, client):
+    assert (
+        len({model_run.dataset_id for model_run in model_runs}) == 1
+    ), f"Model runs have conflicting dataset ids: {model_runs}"
+    model_run_ids = [model_run.model_run_id for model_run in model_runs]
+    dataset_id = model_runs[0].dataset_id
+    response = client.make_request(
+        payload={"modelRunIds": model_run_ids},
+        route=f"autocurate/{dataset_id}/single_model_entropy/{name}",
+        requests_command=requests.post,
+    )
+    # TODO: the response should already have the below three fields populated
+    response[JOB_LAST_KNOWN_STATUS_KEY] = "Started"
+    response[JOB_TYPE_KEY] = "autocurateEntropy"
+    response[JOB_CREATION_TIME_KEY] = (
+        datetime.datetime.now().isoformat("T", "milliseconds") + "Z"
+    )
+    job = AsyncJob.from_json(response, client)
+    return job
diff --git a/nucleus/model_run.py b/nucleus/model_run.py
@@ -31,10 +31,10 @@ class ModelRun:
     def __init__(self, model_run_id: str, dataset_id: str, client):
         self.model_run_id = model_run_id
         self._client = client
-        self._dataset_id = dataset_id
+        self.dataset_id = dataset_id
 
     def __repr__(self):
-        return f"ModelRun(model_run_id='{self.model_run_id}', dataset_id='{self._dataset_id}', client={self._client})"
+        return f"ModelRun(model_run_id='{self.model_run_id}', dataset_id='{self.dataset_id}', client={self._client})"
 
     def __eq__(self, other):
         if self.model_run_id == other.model_run_id:
@@ -115,7 +115,7 @@ def predict(
             check_all_mask_paths_remote(annotations)
 
             request_id = serialize_and_write_to_presigned_url(
-                annotations, self._dataset_id, self._client
+                annotations, self.dataset_id, self._client
             )
             response = self._client.make_request(
                 payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
diff --git a/pyproject.toml b/pyproject.toml
@@ -48,6 +48,7 @@ flake8 = "^3.9.1"
 mypy = "^0.812"
 coverage = "^5.5"
 pre-commit = "^2.12.1"
+jupyterlab = "^3.1.10"
 
 [tool.pytest.ini_options]
 markers = [
diff --git a/tests/test_autocurate.py b/tests/test_autocurate.py
@@ -0,0 +1,55 @@
+from nucleus.prediction import BoxPrediction
+from nucleus.job import AsyncJob
+from nucleus import autocurate, DatasetItem
+import time
+from nucleus.constants import ERROR_PAYLOAD
+from tests.helpers import (
+    TEST_BOX_PREDICTIONS,
+    TEST_DATASET_NAME,
+    TEST_IMG_URLS,
+    TEST_MODEL_NAME,
+    TEST_MODEL_RUN,
+    reference_id_from_url,
+)
+import pytest
+
+
+@pytest.fixture()
+def model_run(CLIENT):
+    ds = CLIENT.create_dataset(TEST_DATASET_NAME)
+    ds_items = []
+    for url in TEST_IMG_URLS[:2]:
+        ds_items.append(
+            DatasetItem(
+                image_location=url,
+                reference_id=reference_id_from_url(url),
+            )
+        )
+
+    response = ds.append(ds_items)
+
+    assert ERROR_PAYLOAD not in response.json()
+
+    model = CLIENT.add_model(
+        name=TEST_MODEL_NAME, reference_id="model_" + str(time.time())
+    )
+
+    run = model.create_run(name=TEST_MODEL_RUN, dataset=ds, predictions=[])
+    prediction = BoxPrediction(**TEST_BOX_PREDICTIONS[1])
+    run.predict(annotations=[prediction])
+
+    yield run
+
+    response = CLIENT.delete_dataset(ds.id)
+    assert response == {"message": "Beginning dataset deletion..."}
+    response = CLIENT.delete_model(model.id)
+    assert response == {}
+
+
+@pytest.mark.integration
+def test_autocurate_integration(model_run, CLIENT):
+    job = autocurate.entropy(
+        "Test Autocurate Integration", [model_run], CLIENT
+    )
+    job.sleep_until_complete()
+    assert job.job_last_known_status == "Completed"
diff --git a/tests/test_prediction.py b/tests/test_prediction.py
@@ -298,7 +298,7 @@ def test_mixed_pred_upload_async(model_run: ModelRun):
                 "total": 2,
                 "errored": 0,
                 "ignored": 0,
-                "datasetId": model_run._dataset_id,
+                "datasetId": model_run.dataset_id,
                 "processed": 2,
             },
             "segmentation_upload": {
@@ -339,7 +339,7 @@ def test_mixed_pred_upload_async_with_error(model_run: ModelRun):
                 "total": 2,
                 "errored": 1,
                 "ignored": 0,
-                "datasetId": model_run._dataset_id,
+                "datasetId": model_run.dataset_id,
                 "processed": 1,
             },
             "segmentation_upload": {