Skip to content

Commit 83bab96

Browse files
authored
Merge pull request #121 from scaleapi/jihan/autotag-api
Support for new autotag exporting apis
2 parents d653ba3 + af05135 commit 83bab96

File tree

5 files changed

+91
-16
lines changed

5 files changed

+91
-16
lines changed

nucleus/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
ANNOTATION_TYPES = (BOX_TYPE, POLYGON_TYPE, SEGMENTATION_TYPE, CUBOID_TYPE)
1414
ANNOTATION_UPDATE_KEY = "update"
1515
AUTOTAGS_KEY = "autotags"
16+
AUTOTAG_SCORE_THRESHOLD = "score_threshold"
1617
EXPORTED_ROWS = "exportedRows"
1718
CAMERA_PARAMS_KEY = "camera_params"
1819
CLASS_PDF_KEY = "class_pdf"

nucleus/dataset.py

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
NAME_KEY,
2525
REFERENCE_IDS_KEY,
2626
REQUEST_ID_KEY,
27+
AUTOTAG_SCORE_THRESHOLD,
2728
UPDATE_KEY,
2829
)
2930
from .dataset_item import (
@@ -87,21 +88,55 @@ def items(self) -> List[DatasetItem]:
8788
return self._client.get_dataset_items(self.id)
8889

8990
@sanitize_string_args
90-
def autotag_scores(self, autotag_name, for_scores_greater_than=0):
91-
"""Export the autotag scores above a threshold, largest scores first.
91+
def autotag_items(self, autotag_name, for_scores_greater_than=0):
92+
"""For a given Autotag of this dataset, export its tagged items with scores above a threshold, largest scores first.
9293
93-
If you have pandas installed, you can create a pandas dataframe using
94+
:return: dictionary of the form
95+
{
96+
'autotagItems': {
97+
ref_id: str,
98+
score: float,
99+
model_prediction_id: str | None
100+
ground_truth_annotation_id: str | None,
101+
}[],
102+
'autotag': {
103+
id: str,
104+
name: str,
105+
status: 'started' | 'completed',
106+
autotag_level: 'Image' | 'Object'
107+
}
108+
}
109+
See https://dashboard.nucleus.scale.com/nucleus/docs/api#export-autotag-items for more details on the return types.
110+
"""
111+
response = self._client.make_request(
112+
payload={AUTOTAG_SCORE_THRESHOLD: for_scores_greater_than},
113+
route=f"autotag/dataset/{self.id}/autotag/{autotag_name}/taggedItems",
114+
requests_command=requests.get,
115+
)
116+
return response
94117

95-
pandas.Dataframe(dataset.autotag_scores(autotag_name))
118+
def autotag_training_items(self, autotag_name):
119+
"""For a given Autotag of this dataset, export its training items. These are user selected positives during refinement.
96120
97121
:return: dictionary of the form
98-
{'ref_ids': List[str],
99-
'datset_item_ids': List[str],
100-
'score': List[float]}
122+
{
123+
'autotagPositiveTrainingItems': {
124+
ref_id: str,
125+
model_prediction_id: str | None,
126+
ground_truth_annotation_id: str | None,
127+
}[],
128+
'autotag': {
129+
id: str,
130+
name: str,
131+
status: 'started' | 'completed',
132+
autotag_level: 'Image' | 'Object'
133+
}
134+
}
135+
See https://dashboard.nucleus.scale.com/nucleus/docs/api#export-autotag-training-items for more details on the return types.
101136
"""
102137
response = self._client.make_request(
103138
payload={},
104-
route=f"autotag/{self.id}/{autotag_name}/{for_scores_greater_than}",
139+
route=f"autotag/dataset/{self.id}/autotag/{autotag_name}/trainingItems",
105140
requests_command=requests.get,
106141
)
107142
return response

nucleus/dataset_item.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,5 @@ def check_for_duplicate_reference_ids(dataset_items: Sequence[DatasetItem]):
193193
for key, value in Counter(ref_ids).items()
194194
}
195195
raise ValueError(
196-
"Duplicate reference ids found among dataset_items: %s"
197-
% duplicates
196+
f"Duplicate reference ids found among dataset_items: {duplicates}"
198197
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.1.19"
24+
version = "0.1.20"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

tests/test_dataset.py

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def test_raises_error_for_duplicate():
329329
)
330330

331331

332-
def test_dataset_export_autotag_scores(CLIENT):
332+
def test_dataset_export_autotag_tagged_items(CLIENT):
333333
# This test can only run for the test user who has an indexed dataset.
334334
# TODO: if/when we can create autotags via api, create one instead.
335335
if NUCLEUS_PYTEST_USER_ID in CLIENT.api_key:
@@ -342,11 +342,51 @@ def test_dataset_export_autotag_scores(CLIENT):
342342
in str(api_error.value)
343343
)
344344

345-
scores = dataset.autotag_scores(autotag_name="PytestTestTag")
345+
items = dataset.autotag_items(autotag_name="PytestTestTag")
346346

347-
for column in ["dataset_item_ids", "ref_ids", "scores"]:
348-
assert column in scores
349-
assert len(scores[column]) > 0
347+
assert "autotagItems" in items
348+
assert "autotag" in items
349+
350+
autotagItems = items["autotagItems"]
351+
autotag = items["autotag"]
352+
353+
assert len(autotagItems) > 0
354+
for item in autotagItems:
355+
for column in ["ref_id", "score"]:
356+
assert column in item
357+
358+
for column in ["id", "name", "status", "autotag_level"]:
359+
assert column in autotag
360+
361+
362+
def test_dataset_export_autotag_training_items(CLIENT):
363+
# This test can only run for the test user who has an indexed dataset.
364+
# TODO: if/when we can create autotags via api, create one instead.
365+
if NUCLEUS_PYTEST_USER_ID in CLIENT.api_key:
366+
dataset = CLIENT.get_dataset(DATASET_WITH_AUTOTAG)
367+
368+
with pytest.raises(NucleusAPIError) as api_error:
369+
dataset.autotag_scores(autotag_name="NONSENSE_GARBAGE")
370+
assert (
371+
f"The autotag NONSENSE_GARBAGE was not found in dataset {DATASET_WITH_AUTOTAG}"
372+
in str(api_error.value)
373+
)
374+
375+
items = dataset.autotag_training_items(autotag_name="PytestTestTag")
376+
377+
assert "autotagItems" in items
378+
assert "autotag" in items
379+
380+
autotagTrainingItems = items["autotagPositiveTrainingItems"]
381+
autotag = items["autotag"]
382+
383+
assert len(autotagTrainingItems) > 0
384+
for item in autotagTrainingItems:
385+
for column in ["ref_id"]:
386+
assert column in item
387+
388+
for column in ["id", "name", "status", "autotag_level"]:
389+
assert column in autotag
350390

351391

352392
@pytest.mark.integration

0 commit comments

Comments
 (0)