Skip to content

Commit 2f7a913

Browse files
authored
Merge pull request #98 from scaleapi/jihan/indexing-api
API for continuous indexing and manual image indexing
2 parents 9247aa9 + fcaf243 commit 2f7a913

File tree

5 files changed

+48
-1
lines changed

5 files changed

+48
-1
lines changed

nucleus/__init__.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
JOB_CREATION_TIME_KEY,
9595
IMAGE_KEY,
9696
IMAGE_URL_KEY,
97+
INDEX_CONTINUOUS_ENABLE_KEY,
9798
ITEM_METADATA_SCHEMA_KEY,
9899
ITEMS_KEY,
99100
KEEP_HISTORY_KEY,
@@ -1206,6 +1207,37 @@ def delete_custom_index(self, dataset_id: str):
12061207
requests_command=requests.delete,
12071208
)
12081209

1210+
def set_continuous_indexing(self, dataset_id: str, enable: bool = True):
1211+
"""
1212+
Sets continuous indexing for a given dataset, which will automatically generate embeddings whenever
1213+
new images are uploaded. This endpoint is currently only enabled for enterprise customers.
1214+
Please reach out to nucleus@scale.com if you wish to learn more.
1215+
1216+
:param
1217+
dataset_id: id of dataset that continuous indexing is being toggled for
1218+
enable: boolean, sets whether we are enabling or disabling continuous indexing. The default behavior is to enable.
1219+
"""
1220+
return self.make_request(
1221+
{INDEX_CONTINUOUS_ENABLE_KEY: enable},
1222+
f"indexing/{dataset_id}/setContinuous",
1223+
requests_command=requests.post,
1224+
)
1225+
1226+
def create_image_index(self, dataset_id: str):
1227+
"""
1228+
Starts generating embeddings for images that don't have embeddings in a given dataset. These embeddings will
1229+
be used for autotag and similarity search. This endpoint is currently only enabled for enterprise customers.
1230+
Please reach out to nucleus@scale.com if you wish to learn more.
1231+
1232+
:param
1233+
dataset_id: id of dataset for generating embeddings on.
1234+
"""
1235+
return self.make_request(
1236+
{},
1237+
f"indexing/{dataset_id}/internal/image",
1238+
requests_command=requests.post,
1239+
)
1240+
12091241
def make_request(
12101242
self, payload: dict, route: str, requests_command=requests.post
12111243
) -> dict:

nucleus/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
IMAGE_LOCATION_KEY = "image_location"
4545
IMAGE_URL_KEY = "image_url"
4646
INDEX_KEY = "index"
47+
INDEX_CONTINUOUS_ENABLE_KEY = "enable"
4748
ITEMS_KEY = "items"
4849
ITEM_ID_KEY = "item_id"
4950
ITEM_KEY = "item"

nucleus/dataset.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,13 @@ def create_custom_index(self, embeddings_urls: list, embedding_dim: int):
400400
def delete_custom_index(self):
401401
return self._client.delete_custom_index(self.id)
402402

403+
def set_continuous_indexing(self, enable: bool = True):
404+
return self._client.set_continuous_indexing(self.id, enable)
405+
406+
def create_image_index(self):
407+
response = self._client.create_image_index(self.id)
408+
return AsyncJob.from_json(response, self._client)
409+
403410
def check_index_status(self, job_id: str):
404411
return self._client.check_index_status(job_id)
405412

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.1.15"
24+
version = "0.1.16"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

tests/test_indexing.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,10 @@ def test_index_integration(dataset):
5757
assert STATUS_KEY in job_status_response
5858
assert JOB_ID_KEY in job_status_response
5959
assert MESSAGE_KEY in job_status_response
60+
61+
62+
def test_generate_image_index_integration(dataset):
63+
job = dataset.create_image_index()
64+
job.sleep_until_complete()
65+
job.status()
66+
assert job.job_last_known_status == "Completed"

0 commit comments

Comments
 (0)