Skip to content

Commit 4a23623

Browse files
Drew KaulDrew Kaul
authored andcommitted
merge with master
2 parents b43a059 + 16dda47 commit 4a23623

File tree

8 files changed

+85
-1
lines changed

8 files changed

+85
-1
lines changed

nucleus/__init__.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
JOB_CREATION_TIME_KEY,
9595
IMAGE_KEY,
9696
IMAGE_URL_KEY,
97+
INDEX_CONTINUOUS_ENABLE_KEY,
9798
ITEM_METADATA_SCHEMA_KEY,
9899
ITEMS_KEY,
99100
KEEP_HISTORY_KEY,
@@ -1206,6 +1207,37 @@ def delete_custom_index(self, dataset_id: str):
12061207
requests_command=requests.delete,
12071208
)
12081209

1210+
def set_continuous_indexing(self, dataset_id: str, enable: bool = True):
1211+
"""
1212+
Sets continuous indexing for a given dataset, which will automatically generate embeddings whenever
1213+
new images are uploaded. This endpoint is currently only enabled for enterprise customers.
1214+
Please reach out to nucleus@scale.com if you wish to learn more.
1215+
1216+
:param
1217+
dataset_id: id of dataset that continuous indexing is being toggled for
1218+
enable: boolean, sets whether we are enabling or disabling continuous indexing. The default behavior is to enable.
1219+
"""
1220+
return self.make_request(
1221+
{INDEX_CONTINUOUS_ENABLE_KEY: enable},
1222+
f"indexing/{dataset_id}/setContinuous",
1223+
requests_command=requests.post,
1224+
)
1225+
1226+
def create_image_index(self, dataset_id: str):
1227+
"""
1228+
Starts generating embeddings for images that don't have embeddings in a given dataset. These embeddings will
1229+
be used for autotag and similarity search. This endpoint is currently only enabled for enterprise customers.
1230+
Please reach out to nucleus@scale.com if you wish to learn more.
1231+
1232+
:param
1233+
dataset_id: id of dataset for generating embeddings on.
1234+
"""
1235+
return self.make_request(
1236+
{},
1237+
f"indexing/{dataset_id}/internal/image",
1238+
requests_command=requests.post,
1239+
)
1240+
12091241
def make_request(
12101242
self, payload: dict, route: str, requests_command=requests.post
12111243
) -> dict:

nucleus/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
IMAGE_LOCATION_KEY = "image_location"
4545
IMAGE_URL_KEY = "image_url"
4646
INDEX_KEY = "index"
47+
INDEX_CONTINUOUS_ENABLE_KEY = "enable"
4748
ITEMS_KEY = "items"
4849
ITEM_ID_KEY = "item_id"
4950
ITEM_KEY = "item"

nucleus/dataset.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,13 @@ def create_custom_index(self, embeddings_urls: list, embedding_dim: int):
400400
def delete_custom_index(self):
401401
return self._client.delete_custom_index(self.id)
402402

403+
def set_continuous_indexing(self, enable: bool = True):
404+
return self._client.set_continuous_indexing(self.id, enable)
405+
406+
def create_image_index(self):
407+
response = self._client.create_image_index(self.id)
408+
return AsyncJob.from_json(response, self._client)
409+
403410
def check_index_status(self, job_id: str):
404411
return self._client.check_index_status(job_id)
405412

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.1.15"
24+
version = "0.1.16"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

tests/test_dataset.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,16 @@ def test_dataset_append_async(dataset: Dataset):
226226
"PayloadUrl": "",
227227
"image_upload_step": {"errored": 0, "pending": 0, "completed": 5},
228228
"started_image_processing": f"Dataset: {dataset.id}, Job: {job.job_id}",
229+
"ingest_to_reupload_queue": {
230+
"epoch": 1,
231+
"total": 5,
232+
"datasetId": f"{dataset.id}",
233+
"processed": 5,
234+
},
229235
},
236+
"job_progress": "1.00",
237+
"completed_steps": 5,
238+
"total_steps": 5,
230239
}
231240

232241

@@ -253,9 +262,23 @@ def test_dataset_append_async_with_1_bad_url(dataset: Dataset):
253262
"status": "Errored",
254263
"message": {
255264
"PayloadUrl": "",
265+
"final_error": (
266+
"One or more of the images you attempted to upload did not process"
267+
" correctly. Please see the status for an overview and the errors for "
268+
"more detailed messages."
269+
),
256270
"image_upload_step": {"errored": 1, "pending": 0, "completed": 4},
271+
"ingest_to_reupload_queue": {
272+
"epoch": 1,
273+
"total": 5,
274+
"datasetId": f"{dataset.id}",
275+
"processed": 5,
276+
},
257277
"started_image_processing": f"Dataset: {dataset.id}, Job: {job.job_id}",
258278
},
279+
"job_progress": "1.00",
280+
"completed_steps": 1,
281+
"total_steps": 1,
259282
}
260283
# The error is fairly detailed and subject to change. What's important is we surface which URLs failed.
261284
assert (
@@ -337,6 +360,9 @@ def test_annotate_async(dataset: Dataset):
337360
"processed": 1,
338361
},
339362
},
363+
"job_progress": "1.00",
364+
"completed_steps": 3,
365+
"total_steps": 3,
340366
}
341367

342368

@@ -372,6 +398,9 @@ def test_annotate_async_with_error(dataset: Dataset):
372398
"processed": 1,
373399
},
374400
},
401+
"job_progress": "0.67",
402+
"completed_steps": 2,
403+
"total_steps": 3,
375404
}
376405

377406
assert "Item with id fake_garbage doesn" in str(job.errors())

tests/test_indexing.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,11 @@ def test_index_integration(dataset):
5757
assert STATUS_KEY in job_status_response
5858
assert JOB_ID_KEY in job_status_response
5959
assert MESSAGE_KEY in job_status_response
60+
61+
62+
@pytest.mark.skip(reason="Times out consistently")
63+
def test_generate_image_index_integration(dataset):
64+
job = dataset.create_image_index()
65+
job.sleep_until_complete()
66+
job.status()
67+
assert job.job_last_known_status == "Completed"

tests/test_prediction.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,9 @@ def test_mixed_pred_upload_async(model_run: ModelRun):
307307
"processed": 1,
308308
},
309309
},
310+
"job_progress": "1.00",
311+
"completed_steps": 3,
312+
"total_steps": 3,
310313
}
311314

312315

@@ -345,6 +348,9 @@ def test_mixed_pred_upload_async_with_error(model_run: ModelRun):
345348
"processed": 1,
346349
},
347350
},
351+
"job_progress": "0.67",
352+
"completed_steps": 2,
353+
"total_steps": 3,
348354
}
349355

350356
assert "Item with id fake_garbage doesn" in str(job.errors())

tests/test_slice.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ def sort_by_reference_id(items):
155155
)
156156

157157

158+
@pytest.mark.skip(reason="404 not found error")
158159
@pytest.mark.integration
159160
def test_slice_send_to_labeling(dataset):
160161
# Dataset upload

0 commit comments

Comments
 (0)