Skip to content

Commit a5ef70f

Browse files
authored
Merge pull request #112 from scaleapi/vinjai/autocurate
Add Autocurate to API, and add test
2 parents 4de8dca + d3ee877 commit a5ef70f

File tree

6 files changed

+93
-5
lines changed

6 files changed

+93
-5
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,6 @@ dmypy.json
134134

135135
# Poetry lockfile (no need for deploys, best practice is to not check this in)
136136
poetry.lock
137+
138+
# vscode
139+
.vscode/

nucleus/autocurate.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import datetime
2+
import requests
3+
from nucleus.constants import (
4+
JOB_CREATION_TIME_KEY,
5+
JOB_LAST_KNOWN_STATUS_KEY,
6+
JOB_TYPE_KEY,
7+
)
8+
from nucleus.job import AsyncJob
9+
10+
11+
def entropy(name, model_runs, client):
12+
assert (
13+
len({model_run.dataset_id for model_run in model_runs}) == 1
14+
), f"Model runs have conflicting dataset ids: {model_runs}"
15+
model_run_ids = [model_run.model_run_id for model_run in model_runs]
16+
dataset_id = model_runs[0].dataset_id
17+
response = client.make_request(
18+
payload={"modelRunIds": model_run_ids},
19+
route=f"autocurate/{dataset_id}/single_model_entropy/{name}",
20+
requests_command=requests.post,
21+
)
22+
# TODO: the response should already have the below three fields populated
23+
response[JOB_LAST_KNOWN_STATUS_KEY] = "Started"
24+
response[JOB_TYPE_KEY] = "autocurateEntropy"
25+
response[JOB_CREATION_TIME_KEY] = (
26+
datetime.datetime.now().isoformat("T", "milliseconds") + "Z"
27+
)
28+
job = AsyncJob.from_json(response, client)
29+
return job

nucleus/model_run.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ class ModelRun:
3131
def __init__(self, model_run_id: str, dataset_id: str, client):
3232
self.model_run_id = model_run_id
3333
self._client = client
34-
self._dataset_id = dataset_id
34+
self.dataset_id = dataset_id
3535

3636
def __repr__(self):
37-
return f"ModelRun(model_run_id='{self.model_run_id}', dataset_id='{self._dataset_id}', client={self._client})"
37+
return f"ModelRun(model_run_id='{self.model_run_id}', dataset_id='{self.dataset_id}', client={self._client})"
3838

3939
def __eq__(self, other):
4040
if self.model_run_id == other.model_run_id:
@@ -115,7 +115,7 @@ def predict(
115115
check_all_mask_paths_remote(annotations)
116116

117117
request_id = serialize_and_write_to_presigned_url(
118-
annotations, self._dataset_id, self._client
118+
annotations, self.dataset_id, self._client
119119
)
120120
response = self._client.make_request(
121121
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ flake8 = "^3.9.1"
4848
mypy = "^0.812"
4949
coverage = "^5.5"
5050
pre-commit = "^2.12.1"
51+
jupyterlab = "^3.1.10"
5152

5253
[tool.pytest.ini_options]
5354
markers = [

tests/test_autocurate.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from nucleus.prediction import BoxPrediction
2+
from nucleus.job import AsyncJob
3+
from nucleus import autocurate, DatasetItem
4+
import time
5+
from nucleus.constants import ERROR_PAYLOAD
6+
from tests.helpers import (
7+
TEST_BOX_PREDICTIONS,
8+
TEST_DATASET_NAME,
9+
TEST_IMG_URLS,
10+
TEST_MODEL_NAME,
11+
TEST_MODEL_RUN,
12+
reference_id_from_url,
13+
)
14+
import pytest
15+
16+
17+
@pytest.fixture()
18+
def model_run(CLIENT):
19+
ds = CLIENT.create_dataset(TEST_DATASET_NAME)
20+
ds_items = []
21+
for url in TEST_IMG_URLS[:2]:
22+
ds_items.append(
23+
DatasetItem(
24+
image_location=url,
25+
reference_id=reference_id_from_url(url),
26+
)
27+
)
28+
29+
response = ds.append(ds_items)
30+
31+
assert ERROR_PAYLOAD not in response.json()
32+
33+
model = CLIENT.add_model(
34+
name=TEST_MODEL_NAME, reference_id="model_" + str(time.time())
35+
)
36+
37+
run = model.create_run(name=TEST_MODEL_RUN, dataset=ds, predictions=[])
38+
prediction = BoxPrediction(**TEST_BOX_PREDICTIONS[1])
39+
run.predict(annotations=[prediction])
40+
41+
yield run
42+
43+
response = CLIENT.delete_dataset(ds.id)
44+
assert response == {"message": "Beginning dataset deletion..."}
45+
response = CLIENT.delete_model(model.id)
46+
assert response == {}
47+
48+
49+
@pytest.mark.integration
50+
def test_autocurate_integration(model_run, CLIENT):
51+
job = autocurate.entropy(
52+
"Test Autocurate Integration", [model_run], CLIENT
53+
)
54+
job.sleep_until_complete()
55+
assert job.job_last_known_status == "Completed"

tests/test_prediction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def test_mixed_pred_upload_async(model_run: ModelRun):
298298
"total": 2,
299299
"errored": 0,
300300
"ignored": 0,
301-
"datasetId": model_run._dataset_id,
301+
"datasetId": model_run.dataset_id,
302302
"processed": 2,
303303
},
304304
"segmentation_upload": {
@@ -339,7 +339,7 @@ def test_mixed_pred_upload_async_with_error(model_run: ModelRun):
339339
"total": 2,
340340
"errored": 1,
341341
"ignored": 0,
342-
"datasetId": model_run._dataset_id,
342+
"datasetId": model_run.dataset_id,
343343
"processed": 1,
344344
},
345345
"segmentation_upload": {

0 commit comments

Comments
 (0)