@@ -323,7 +323,7 @@ def populate_dataset(
323
323
self ,
324
324
dataset_id : str ,
325
325
dataset_items : List [DatasetItem ],
326
- batch_size : int = 100 ,
326
+ batch_size : int = 30 ,
327
327
update : bool = False ,
328
328
):
329
329
"""
@@ -1180,7 +1180,8 @@ def set_continuous_indexing(self, dataset_id: str, enable: bool = True):
1180
1180
def create_image_index (self , dataset_id : str ):
1181
1181
"""
1182
1182
Starts generating embeddings for images that don't have embeddings in a given dataset. These embeddings will
1183
- be used for autotag and similarity search. This endpoint is currently only enabled for enterprise customers.
1183
+ be used for autotag and similarity search. This endpoint is limited to generating embeddings for 2 million
1184
+ images at a time. This endpoint is also currently only enabled for enterprise customers.
1184
1185
Please reach out to nucleus@scale.com if you wish to learn more.
1185
1186
1186
1187
:param
@@ -1192,6 +1193,33 @@ def create_image_index(self, dataset_id: str):
1192
1193
requests_command = requests .post ,
1193
1194
)
1194
1195
1196
+ def create_object_index (
1197
+ self , dataset_id : str , model_run_id : str , gt_only : bool
1198
+ ):
1199
+ """
1200
+ Starts generating embeddings for objects that don't have embeddings in a given dataset. These embeddings will
1201
+ be used for autotag and similarity search. This endpoint only supports indexing objects sourced from the predictions
1202
+ of a single model run or the ground truth annotations of a dataset.
1203
+
1204
+ This endpoint is limited to generating embeddings for 3 million objects at a time. This endpoint is also currently
1205
+ only enabled for enterprise customers. Please reach out to nucleus@scale.com if you wish to learn more.
1206
+
1207
+ :param
1208
+ dataset_id: id of dataset for generating embeddings on.
1209
+ model_run_id: id of the model run for generating embeddings on. Mutually exclusive with gt_only
1210
+ gt_only: Whether we are generating embeddings on the ground truth objects in a dataset. Mutually exclusive with model_run_id
1211
+ """
1212
+ payload : Dict [str , Union [str , bool ]] = {}
1213
+ if model_run_id :
1214
+ payload ["model_run_id" ] = model_run_id
1215
+ elif gt_only :
1216
+ payload ["ingest_gt_only" ] = True
1217
+ return self .make_request (
1218
+ payload ,
1219
+ f"indexing/{ dataset_id } /internal/object" ,
1220
+ requests_command = requests .post ,
1221
+ )
1222
+
1195
1223
def make_request (
1196
1224
self , payload : dict , route : str , requests_command = requests .post
1197
1225
) -> dict :
0 commit comments