1
1
"""
2
2
Nucleus Python Library.
3
3
4
- Data formats used:
5
-
6
- _____________________________________________________________________________________________________
7
-
8
- DatasetItem
9
-
10
- image_url | str | The URL containing the image for the given row of data.\n
11
- reference_id | str | An optional user-specified identifier to reference this given image.\n
12
- metadata | dict | All of column definitions for this item.
13
- | | The keys should match the user-specified column names,
14
- | | and the corresponding values will populate the cell under the column.\n
15
- _____________________________________________________________________________________________________
16
-
17
-
18
- Box2DGeometry:
19
-
20
- x | float | The distance, in pixels, between the left border of the bounding box
21
- | | and the left border of the image.\n
22
- y | float | The distance, in pixels, between the top border of the bounding box
23
- | | and the top border of the image.\n
24
- width | float | The width in pixels of the annotation.\n
25
- height | float | The height in pixels of the annotation.\n
26
-
27
- Box2DAnnotation:
28
-
29
- item_id | str | The internally-controlled item identifier to associate this annotation with.
30
- | | The reference_id field should be empty if this field is populated.\n
31
- reference_id | str | The user-specified reference identifier to associate this annotation with.\n
32
- | | The item_id field should be empty if this field is populated.
33
- label | str | The label for this annotation (e.g. car, pedestrian, bicycle).\n
34
- type | str | The type of this annotation. It should always be the box string literal.\n
35
- geometry | dict | Representation of the bounding box in the Box2DGeometry format.\n
36
- metadata | dict | An arbitrary metadata blob for the annotation.\n
37
-
38
- _____________________________________________________________________________________________________
39
-
40
- Box2DDetection:
41
-
42
- item_id | str | The internally-controlled item identifier to associate this annotation with.
43
- | | The reference_id field should be empty if this field is populated.\n
44
- reference_id | str | The user-specified reference identifier to associate this annotation with.
45
- | | The item_id field should be empty if this field is populated.\n
46
- label | str | The label for this annotation (e.g. car, pedestrian, bicycle).\n
47
- type | str | The type of this annotation. It should always be the box string literal.\n
48
- confidence | float | The optional confidence level of this annotation.
49
- | | It should be between 0 and 1 (inclusive).\n
50
- geometry | dict | Representation of the bounding box in the Box2DGeometry format.\n
51
- metadata | dict | An arbitrary metadata blob for the annotation.\n
4
+ For full documentation see: https://dashboard.scale.com/nucleus/docs/api?language=python
52
5
"""
53
6
import asyncio
54
7
import json
82
35
ANNOTATIONS_PROCESSED_KEY ,
83
36
AUTOTAGS_KEY ,
84
37
DATASET_ID_KEY ,
85
- DATASET_ITEM_IDS_KEY ,
86
38
DEFAULT_NETWORK_TIMEOUT_SEC ,
87
39
EMBEDDING_DIMENSION_KEY ,
88
40
EMBEDDINGS_URL_KEY ,
@@ -245,11 +197,8 @@ def get_dataset_items(self, dataset_id) -> List[DatasetItem]:
245
197
for item in dataset_items :
246
198
image_url = item .get ("original_image_url" )
247
199
metadata = item .get ("metadata" , None )
248
- item_id = item .get ("id" , None )
249
200
ref_id = item .get ("ref_id" , None )
250
- dataset_item = DatasetItem (
251
- image_url , ref_id , item_id , metadata
252
- )
201
+ dataset_item = DatasetItem (image_url , ref_id , metadata )
253
202
constructed_dataset_items .append (dataset_item )
254
203
elif error :
255
204
raise DatasetItemRetrievalError (message = error )
@@ -350,26 +299,19 @@ def delete_dataset(self, dataset_id: str) -> dict:
350
299
return self .make_request ({}, f"dataset/{ dataset_id } " , requests .delete )
351
300
352
301
@sanitize_string_args
353
- def delete_dataset_item (
354
- self , dataset_id : str , item_id : str = None , reference_id : str = None
355
- ) -> dict :
302
+ def delete_dataset_item (self , dataset_id : str , reference_id ) -> dict :
356
303
"""
357
304
Deletes a private dataset based on datasetId.
358
305
Returns an empty payload where response status `200` indicates
359
306
the dataset has been successfully deleted.
360
307
:param payload: { "name": str }
361
308
:return: { "dataset_id": str, "name": str }
362
309
"""
363
- if item_id :
364
- return self .make_request (
365
- {}, f"dataset/{ dataset_id } /{ item_id } " , requests .delete
366
- )
367
- else : # Assume reference_id is provided
368
- return self .make_request (
369
- {},
370
- f"dataset/{ dataset_id } /refloc/{ reference_id } " ,
371
- requests .delete ,
372
- )
310
+ return self .make_request (
311
+ {},
312
+ f"dataset/{ dataset_id } /refloc/{ reference_id } " ,
313
+ requests .delete ,
314
+ )
373
315
374
316
def populate_dataset (
375
317
self ,
@@ -1016,17 +958,13 @@ def create_slice(self, dataset_id: str, payload: dict) -> Slice:
1016
958
as a means of identifying items in the dataset.
1017
959
1018
960
"name" -- The human-readable name of the slice.
1019
-
1020
- "dataset_item_ids" -- An optional list of dataset item ids for the items in the slice
1021
-
1022
961
"reference_ids" -- An optional list of user-specified identifier for the items in the slice
1023
962
1024
963
:param
1025
964
dataset_id: id of the dataset
1026
965
payload:
1027
966
{
1028
967
"name": str,
1029
- "dataset_item_ids": List[str],
1030
968
"reference_ids": List[str],
1031
969
}
1032
970
:return: new Slice object
@@ -1052,14 +990,12 @@ def slice_info(self, slice_id: str) -> dict:
1052
990
1053
991
:param
1054
992
slice_id: id of the slice
1055
- id_type: the type of IDs you want in response (either "reference_id" or "dataset_item_id")
1056
- to identify the DatasetItems
1057
993
1058
994
:return:
1059
995
{
1060
996
"name": str,
1061
997
"dataset_id": str,
1062
- "dataset_item_ids ": List[str],
998
+ "reference_ids ": List[str],
1063
999
}
1064
1000
"""
1065
1001
response = self .make_request (
@@ -1111,35 +1047,25 @@ def delete_annotations(
1111
1047
def append_to_slice (
1112
1048
self ,
1113
1049
slice_id : str ,
1114
- dataset_item_ids : List [str ] = None ,
1115
- reference_ids : List [str ] = None ,
1050
+ reference_ids : List [str ],
1116
1051
) -> dict :
1117
1052
"""
1118
1053
Appends to a slice from items already present in a dataset.
1119
1054
The caller must exclusively use either datasetItemIds or reference_ids
1120
1055
as a means of identifying items in the dataset.
1121
1056
1122
1057
:param
1123
- dataset_item_ids: List[str],
1124
1058
reference_ids: List[str],
1125
1059
1126
1060
:return:
1127
1061
{
1128
1062
"slice_id": str,
1129
1063
}
1130
1064
"""
1131
- if dataset_item_ids and reference_ids :
1132
- raise Exception (
1133
- "You cannot specify both dataset_item_ids and reference_ids"
1134
- )
1135
1065
1136
- ids_to_append : Dict [str , Any ] = {}
1137
- if dataset_item_ids :
1138
- ids_to_append [DATASET_ITEM_IDS_KEY ] = dataset_item_ids
1139
- if reference_ids :
1140
- ids_to_append [REFERENCE_IDS_KEY ] = reference_ids
1141
-
1142
- response = self .make_request (ids_to_append , f"slice/{ slice_id } /append" )
1066
+ response = self .make_request (
1067
+ {REFERENCE_IDS_KEY : reference_ids }, f"slice/{ slice_id } /append"
1068
+ )
1143
1069
return response
1144
1070
1145
1071
def list_autotags (self , dataset_id : str ) -> List [str ]:
@@ -1192,7 +1118,7 @@ def create_custom_index(
1192
1118
1193
1119
:param
1194
1120
dataset_id: id of dataset that the custom index is being added to.
1195
- embeddings_urls: list of urls, each of which being a json mapping dataset_item_id -> embedding vector
1121
+ embeddings_urls: list of urls, each of which being a json mapping reference_id -> embedding vector
1196
1122
embedding_dim: the dimension of the embedding vectors, must be consistent for all embedding vectors in the index.
1197
1123
"""
1198
1124
return self .make_request (
0 commit comments