Skip to content

Commit fbc7208

Browse files
author
Diego Ardila
committed
Merge branch 'master' into da-export-embeddings
2 parents 7d8037f + a5ef70f commit fbc7208

File tree

9 files changed

+113
-9
lines changed

9 files changed

+113
-9
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,6 @@ dmypy.json
134134

135135
# Poetry lockfile (no need for deploys, best practice is to not check this in)
136136
poetry.lock
137+
138+
# vscode
139+
.vscode/

nucleus/autocurate.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import datetime
2+
import requests
3+
from nucleus.constants import (
4+
JOB_CREATION_TIME_KEY,
5+
JOB_LAST_KNOWN_STATUS_KEY,
6+
JOB_TYPE_KEY,
7+
)
8+
from nucleus.job import AsyncJob
9+
10+
11+
def entropy(name, model_runs, client):
12+
assert (
13+
len({model_run.dataset_id for model_run in model_runs}) == 1
14+
), f"Model runs have conflicting dataset ids: {model_runs}"
15+
model_run_ids = [model_run.model_run_id for model_run in model_runs]
16+
dataset_id = model_runs[0].dataset_id
17+
response = client.make_request(
18+
payload={"modelRunIds": model_run_ids},
19+
route=f"autocurate/{dataset_id}/single_model_entropy/{name}",
20+
requests_command=requests.post,
21+
)
22+
# TODO: the response should already have the below three fields populated
23+
response[JOB_LAST_KNOWN_STATUS_KEY] = "Started"
24+
response[JOB_TYPE_KEY] = "autocurateEntropy"
25+
response[JOB_CREATION_TIME_KEY] = (
26+
datetime.datetime.now().isoformat("T", "milliseconds") + "Z"
27+
)
28+
job = AsyncJob.from_json(response, client)
29+
return job

nucleus/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
TYPE_KEY = "type"
8787
UPDATED_ITEMS = "updated_items"
8888
UPDATE_KEY = "update"
89+
UPLOAD_TO_SCALE_KEY = "upload_to_scale"
8990
URL_KEY = "url"
9091
VERTICES_KEY = "vertices"
9192
WIDTH_KEY = "width"

nucleus/dataset_item.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
IMAGE_URL_KEY,
1111
METADATA_KEY,
1212
ORIGINAL_IMAGE_URL_KEY,
13+
UPLOAD_TO_SCALE_KEY,
1314
REFERENCE_ID_KEY,
1415
TYPE_KEY,
1516
URL_KEY,
@@ -92,12 +93,17 @@ class DatasetItem: # pylint: disable=R0902
9293
reference_id: Optional[str] = None
9394
metadata: Optional[dict] = None
9495
pointcloud_location: Optional[str] = None
96+
upload_to_scale: Optional[bool] = True
9597

9698
def __post_init__(self):
9799
assert self.reference_id is not None, "reference_id is required."
98100
assert bool(self.image_location) != bool(
99101
self.pointcloud_location
100102
), "Must specify exactly one of the image_location, pointcloud_location parameters"
103+
if self.pointcloud_location and not self.upload_to_scale:
104+
raise NotImplementedError(
105+
"Skipping upload to Scale is not currently implemented for pointclouds."
106+
)
101107
self.local = (
102108
is_local_path(self.image_location) if self.image_location else None
103109
)
@@ -133,6 +139,7 @@ def from_json(cls, payload: dict, is_scene=False):
133139
image_location=image_url,
134140
reference_id=payload.get(REFERENCE_ID_KEY, None),
135141
metadata=payload.get(METADATA_KEY, {}),
142+
upload_to_scale=payload.get(UPLOAD_TO_SCALE_KEY, None),
136143
)
137144

138145
def local_file_exists(self):
@@ -158,6 +165,7 @@ def to_payload(self, is_scene=False) -> dict:
158165
self.image_location
159166
), "Must specify image_location for DatasetItems not in a LidarScene"
160167
payload[IMAGE_URL_KEY] = self.image_location
168+
payload[UPLOAD_TO_SCALE_KEY] = self.upload_to_scale
161169

162170
return payload
163171

nucleus/model_run.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ class ModelRun:
3131
def __init__(self, model_run_id: str, dataset_id: str, client):
3232
self.model_run_id = model_run_id
3333
self._client = client
34-
self._dataset_id = dataset_id
34+
self.dataset_id = dataset_id
3535

3636
def __repr__(self):
37-
return f"ModelRun(model_run_id='{self.model_run_id}', dataset_id='{self._dataset_id}', client={self._client})"
37+
return f"ModelRun(model_run_id='{self.model_run_id}', dataset_id='{self.dataset_id}', client={self._client})"
3838

3939
def __eq__(self, other):
4040
if self.model_run_id == other.model_run_id:
@@ -115,7 +115,7 @@ def predict(
115115
check_all_mask_paths_remote(annotations)
116116

117117
request_id = serialize_and_write_to_presigned_url(
118-
annotations, self._dataset_id, self._client
118+
annotations, self.dataset_id, self._client
119119
)
120120
response = self._client.make_request(
121121
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ flake8 = "^3.9.1"
4848
mypy = "^0.812"
4949
coverage = "^5.5"
5050
pre-commit = "^2.12.1"
51+
jupyterlab = "^3.1.10"
5152

5253
[tool.pytest.ini_options]
5354
markers = [

tests/test_autocurate.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from nucleus.prediction import BoxPrediction
2+
from nucleus.job import AsyncJob
3+
from nucleus import autocurate, DatasetItem
4+
import time
5+
from nucleus.constants import ERROR_PAYLOAD
6+
from tests.helpers import (
7+
TEST_BOX_PREDICTIONS,
8+
TEST_DATASET_NAME,
9+
TEST_IMG_URLS,
10+
TEST_MODEL_NAME,
11+
TEST_MODEL_RUN,
12+
reference_id_from_url,
13+
)
14+
import pytest
15+
16+
17+
@pytest.fixture()
18+
def model_run(CLIENT):
19+
ds = CLIENT.create_dataset(TEST_DATASET_NAME)
20+
ds_items = []
21+
for url in TEST_IMG_URLS[:2]:
22+
ds_items.append(
23+
DatasetItem(
24+
image_location=url,
25+
reference_id=reference_id_from_url(url),
26+
)
27+
)
28+
29+
response = ds.append(ds_items)
30+
31+
assert ERROR_PAYLOAD not in response.json()
32+
33+
model = CLIENT.add_model(
34+
name=TEST_MODEL_NAME, reference_id="model_" + str(time.time())
35+
)
36+
37+
run = model.create_run(name=TEST_MODEL_RUN, dataset=ds, predictions=[])
38+
prediction = BoxPrediction(**TEST_BOX_PREDICTIONS[1])
39+
run.predict(annotations=[prediction])
40+
41+
yield run
42+
43+
response = CLIENT.delete_dataset(ds.id)
44+
assert response == {"message": "Beginning dataset deletion..."}
45+
response = CLIENT.delete_model(model.id)
46+
assert response == {}
47+
48+
49+
@pytest.mark.integration
50+
def test_autocurate_integration(model_run, CLIENT):
51+
job = autocurate.entropy(
52+
"Test Autocurate Integration", [model_run], CLIENT
53+
)
54+
job.sleep_until_complete()
55+
assert job.job_last_known_status == "Completed"

tests/test_dataset.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import copy
22
import math
3+
from nucleus.model import Model
4+
from nucleus.prediction import BoxPrediction
35
import os
46

57
import pytest
@@ -176,12 +178,17 @@ def check_is_expected_response(response):
176178

177179
# Plain image upload
178180
ds_items_plain = []
179-
for url in TEST_IMG_URLS:
181+
for i, url in enumerate(TEST_IMG_URLS):
182+
# Upload just the first item in privacy mode
183+
upload_to_scale = i == 0
180184
ds_items_plain.append(
181185
DatasetItem(
182-
image_location=url, reference_id=url.split("/")[-1] + "_plain"
186+
image_location=url,
187+
upload_to_scale=upload_to_scale,
188+
reference_id=url.split("/")[-1] + "_plain",
183189
)
184190
)
191+
185192
response = dataset.append(ds_items_plain)
186193
check_is_expected_response(response)
187194

@@ -289,8 +296,8 @@ def test_dataset_append_async_with_1_bad_url(dataset: Dataset):
289296
"started_image_processing": f"Dataset: {dataset.id}, Job: {job.job_id}",
290297
},
291298
"job_progress": "1.00",
292-
"completed_steps": 1,
293-
"total_steps": 1,
299+
"completed_steps": 4,
300+
"total_steps": 4,
294301
}
295302
# The error is fairly detailed and subject to change. What's important is we surface which URLs failed.
296303
assert (

tests/test_prediction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def test_mixed_pred_upload_async(model_run: ModelRun):
298298
"total": 2,
299299
"errored": 0,
300300
"ignored": 0,
301-
"datasetId": model_run._dataset_id,
301+
"datasetId": model_run.dataset_id,
302302
"processed": 2,
303303
},
304304
"segmentation_upload": {
@@ -339,7 +339,7 @@ def test_mixed_pred_upload_async_with_error(model_run: ModelRun):
339339
"total": 2,
340340
"errored": 1,
341341
"ignored": 0,
342-
"datasetId": model_run._dataset_id,
342+
"datasetId": model_run.dataset_id,
343343
"processed": 1,
344344
},
345345
"segmentation_upload": {

0 commit comments

Comments
 (0)