Skip to content

Commit 0464cae

Browse files
authored
Merge pull request #136 from scaleapi/jihan/object-indexing
Add object indexing support
2 parents fd633b5 + d994e24 commit 0464cae

File tree

3 files changed

+39
-3
lines changed

3 files changed

+39
-3
lines changed

nucleus/__init__.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def populate_dataset(
323323
self,
324324
dataset_id: str,
325325
dataset_items: List[DatasetItem],
326-
batch_size: int = 100,
326+
batch_size: int = 30,
327327
update: bool = False,
328328
):
329329
"""
@@ -1180,7 +1180,8 @@ def set_continuous_indexing(self, dataset_id: str, enable: bool = True):
11801180
def create_image_index(self, dataset_id: str):
11811181
"""
11821182
Starts generating embeddings for images that don't have embeddings in a given dataset. These embeddings will
1183-
be used for autotag and similarity search. This endpoint is currently only enabled for enterprise customers.
1183+
be used for autotag and similarity search. This endpoint is limited to generating embeddings for 2 million
1184+
images at a time. This endpoint is also currently only enabled for enterprise customers.
11841185
Please reach out to nucleus@scale.com if you wish to learn more.
11851186
11861187
:param
@@ -1192,6 +1193,33 @@ def create_image_index(self, dataset_id: str):
11921193
requests_command=requests.post,
11931194
)
11941195

1196+
def create_object_index(
1197+
self, dataset_id: str, model_run_id: str, gt_only: bool
1198+
):
1199+
"""
1200+
Starts generating embeddings for objects that don't have embeddings in a given dataset. These embeddings will
1201+
be used for autotag and similarity search. This endpoint only supports indexing objects sourced from the predictions
1202+
of a single model run or the ground truth annotations of a dataset.
1203+
1204+
This endpoint is limited to generating embeddings for 3 million objects at a time. This endpoint is also currently
1205+
only enabled for enterprise customers. Please reach out to nucleus@scale.com if you wish to learn more.
1206+
1207+
:param
1208+
dataset_id: id of dataset for generating embeddings on.
1209+
model_run_id: id of the model run for generating embeddings on. Mutually exclusive with gt_only
1210+
gt_only: Whether we are generating embeddings on the ground truth objects in a dataset. Mutually exclusive with model_run_id
1211+
"""
1212+
payload: Dict[str, Union[str, bool]] = {}
1213+
if model_run_id:
1214+
payload["model_run_id"] = model_run_id
1215+
elif gt_only:
1216+
payload["ingest_gt_only"] = True
1217+
return self.make_request(
1218+
payload,
1219+
f"indexing/{dataset_id}/internal/object",
1220+
requests_command=requests.post,
1221+
)
1222+
11951223
def make_request(
11961224
self, payload: dict, route: str, requests_command=requests.post
11971225
) -> dict:

nucleus/dataset.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,14 @@ def create_image_index(self):
453453
response = self._client.create_image_index(self.id)
454454
return AsyncJob.from_json(response, self._client)
455455

456+
def create_object_index(
457+
self, model_run_id: str = None, gt_only: bool = None
458+
):
459+
response = self._client.create_object_index(
460+
self.id, model_run_id, gt_only
461+
)
462+
return AsyncJob.from_json(response, self._client)
463+
456464
def add_taxonomy(
457465
self,
458466
taxonomy_name: str,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.1.24"
24+
version = "0.1.25"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

0 commit comments

Comments
 (0)