Skip to content

Commit e78575b

Browse files
authored
Merge pull request #120 from scaleapi/da-load-test
Load testing + id cleanups
2 parents fb9b452 + 3ccf31e commit e78575b

13 files changed

+258
-217
lines changed

nucleus/__init__.py

Lines changed: 14 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,7 @@
11
"""
22
Nucleus Python Library.
33
4-
Data formats used:
5-
6-
_____________________________________________________________________________________________________
7-
8-
DatasetItem
9-
10-
image_url | str | The URL containing the image for the given row of data.\n
11-
reference_id | str | An optional user-specified identifier to reference this given image.\n
12-
metadata | dict | All of column definitions for this item.
13-
| | The keys should match the user-specified column names,
14-
| | and the corresponding values will populate the cell under the column.\n
15-
_____________________________________________________________________________________________________
16-
17-
18-
Box2DGeometry:
19-
20-
x | float | The distance, in pixels, between the left border of the bounding box
21-
| | and the left border of the image.\n
22-
y | float | The distance, in pixels, between the top border of the bounding box
23-
| | and the top border of the image.\n
24-
width | float | The width in pixels of the annotation.\n
25-
height | float | The height in pixels of the annotation.\n
26-
27-
Box2DAnnotation:
28-
29-
item_id | str | The internally-controlled item identifier to associate this annotation with.
30-
| | The reference_id field should be empty if this field is populated.\n
31-
reference_id | str | The user-specified reference identifier to associate this annotation with.\n
32-
| | The item_id field should be empty if this field is populated.
33-
label | str | The label for this annotation (e.g. car, pedestrian, bicycle).\n
34-
type | str | The type of this annotation. It should always be the box string literal.\n
35-
geometry | dict | Representation of the bounding box in the Box2DGeometry format.\n
36-
metadata | dict | An arbitrary metadata blob for the annotation.\n
37-
38-
_____________________________________________________________________________________________________
39-
40-
Box2DDetection:
41-
42-
item_id | str | The internally-controlled item identifier to associate this annotation with.
43-
| | The reference_id field should be empty if this field is populated.\n
44-
reference_id | str | The user-specified reference identifier to associate this annotation with.
45-
| | The item_id field should be empty if this field is populated.\n
46-
label | str | The label for this annotation (e.g. car, pedestrian, bicycle).\n
47-
type | str | The type of this annotation. It should always be the box string literal.\n
48-
confidence | float | The optional confidence level of this annotation.
49-
| | It should be between 0 and 1 (inclusive).\n
50-
geometry | dict | Representation of the bounding box in the Box2DGeometry format.\n
51-
metadata | dict | An arbitrary metadata blob for the annotation.\n
4+
For full documentation see: https://dashboard.scale.com/nucleus/docs/api?language=python
525
"""
536
import asyncio
547
import json
@@ -82,7 +35,6 @@
8235
ANNOTATIONS_PROCESSED_KEY,
8336
AUTOTAGS_KEY,
8437
DATASET_ID_KEY,
85-
DATASET_ITEM_IDS_KEY,
8638
DEFAULT_NETWORK_TIMEOUT_SEC,
8739
EMBEDDING_DIMENSION_KEY,
8840
EMBEDDINGS_URL_KEY,
@@ -245,11 +197,8 @@ def get_dataset_items(self, dataset_id) -> List[DatasetItem]:
245197
for item in dataset_items:
246198
image_url = item.get("original_image_url")
247199
metadata = item.get("metadata", None)
248-
item_id = item.get("id", None)
249200
ref_id = item.get("ref_id", None)
250-
dataset_item = DatasetItem(
251-
image_url, ref_id, item_id, metadata
252-
)
201+
dataset_item = DatasetItem(image_url, ref_id, metadata)
253202
constructed_dataset_items.append(dataset_item)
254203
elif error:
255204
raise DatasetItemRetrievalError(message=error)
@@ -350,26 +299,19 @@ def delete_dataset(self, dataset_id: str) -> dict:
350299
return self.make_request({}, f"dataset/{dataset_id}", requests.delete)
351300

352301
@sanitize_string_args
353-
def delete_dataset_item(
354-
self, dataset_id: str, item_id: str = None, reference_id: str = None
355-
) -> dict:
302+
def delete_dataset_item(self, dataset_id: str, reference_id) -> dict:
356303
"""
357304
Deletes a private dataset based on datasetId.
358305
Returns an empty payload where response status `200` indicates
359306
the dataset has been successfully deleted.
360307
:param payload: { "name": str }
361308
:return: { "dataset_id": str, "name": str }
362309
"""
363-
if item_id:
364-
return self.make_request(
365-
{}, f"dataset/{dataset_id}/{item_id}", requests.delete
366-
)
367-
else: # Assume reference_id is provided
368-
return self.make_request(
369-
{},
370-
f"dataset/{dataset_id}/refloc/{reference_id}",
371-
requests.delete,
372-
)
310+
return self.make_request(
311+
{},
312+
f"dataset/{dataset_id}/refloc/{reference_id}",
313+
requests.delete,
314+
)
373315

374316
def populate_dataset(
375317
self,
@@ -1016,17 +958,13 @@ def create_slice(self, dataset_id: str, payload: dict) -> Slice:
1016958
as a means of identifying items in the dataset.
1017959
1018960
"name" -- The human-readable name of the slice.
1019-
1020-
"dataset_item_ids" -- An optional list of dataset item ids for the items in the slice
1021-
1022961
"reference_ids" -- An optional list of user-specified identifier for the items in the slice
1023962
1024963
:param
1025964
dataset_id: id of the dataset
1026965
payload:
1027966
{
1028967
"name": str,
1029-
"dataset_item_ids": List[str],
1030968
"reference_ids": List[str],
1031969
}
1032970
:return: new Slice object
@@ -1052,14 +990,12 @@ def slice_info(self, slice_id: str) -> dict:
1052990
1053991
:param
1054992
slice_id: id of the slice
1055-
id_type: the type of IDs you want in response (either "reference_id" or "dataset_item_id")
1056-
to identify the DatasetItems
1057993
1058994
:return:
1059995
{
1060996
"name": str,
1061997
"dataset_id": str,
1062-
"dataset_item_ids": List[str],
998+
"reference_ids": List[str],
1063999
}
10641000
"""
10651001
response = self.make_request(
@@ -1111,35 +1047,25 @@ def delete_annotations(
11111047
def append_to_slice(
11121048
self,
11131049
slice_id: str,
1114-
dataset_item_ids: List[str] = None,
1115-
reference_ids: List[str] = None,
1050+
reference_ids: List[str],
11161051
) -> dict:
11171052
"""
11181053
Appends to a slice from items already present in a dataset.
11191054
The caller must exclusively use either datasetItemIds or reference_ids
11201055
as a means of identifying items in the dataset.
11211056
11221057
:param
1123-
dataset_item_ids: List[str],
11241058
reference_ids: List[str],
11251059
11261060
:return:
11271061
{
11281062
"slice_id": str,
11291063
}
11301064
"""
1131-
if dataset_item_ids and reference_ids:
1132-
raise Exception(
1133-
"You cannot specify both dataset_item_ids and reference_ids"
1134-
)
11351065

1136-
ids_to_append: Dict[str, Any] = {}
1137-
if dataset_item_ids:
1138-
ids_to_append[DATASET_ITEM_IDS_KEY] = dataset_item_ids
1139-
if reference_ids:
1140-
ids_to_append[REFERENCE_IDS_KEY] = reference_ids
1141-
1142-
response = self.make_request(ids_to_append, f"slice/{slice_id}/append")
1066+
response = self.make_request(
1067+
{REFERENCE_IDS_KEY: reference_ids}, f"slice/{slice_id}/append"
1068+
)
11431069
return response
11441070

11451071
def list_autotags(self, dataset_id: str) -> List[str]:
@@ -1192,7 +1118,7 @@ def create_custom_index(
11921118
11931119
:param
11941120
dataset_id: id of dataset that the custom index is being added to.
1195-
embeddings_urls: list of urls, each of which being a json mapping dataset_item_id -> embedding vector
1121+
embeddings_urls: list of urls, each of which being a json mapping reference_id -> embedding vector
11961122
embedding_dim: the dimension of the embedding vectors, must be consistent for all embedding vectors in the index.
11971123
"""
11981124
return self.make_request(

nucleus/annotation.py

Lines changed: 15 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,10 @@
99
ANNOTATIONS_KEY,
1010
BOX_TYPE,
1111
CUBOID_TYPE,
12-
DATASET_ITEM_ID_KEY,
1312
DIMENSIONS_KEY,
1413
GEOMETRY_KEY,
1514
HEIGHT_KEY,
1615
INDEX_KEY,
17-
ITEM_ID_KEY,
1816
LABEL_KEY,
1917
MASK_TYPE,
2018
MASK_URL_KEY,
@@ -33,16 +31,7 @@
3331

3432

3533
class Annotation:
36-
reference_id: Optional[str] = None
37-
item_id: Optional[str] = None
38-
39-
def _check_ids(self):
40-
if self.reference_id and self.item_id:
41-
self.item_id = None # Prefer reference id to item id.
42-
if not (self.reference_id or self.item_id):
43-
raise Exception(
44-
"You must specify either a reference_id or an item_id for an annotation."
45-
)
34+
reference_id: str
4635

4736
@classmethod
4837
def from_json(cls, payload: dict):
@@ -93,14 +82,12 @@ def to_payload(self) -> dict:
9382
class SegmentationAnnotation(Annotation):
9483
mask_url: str
9584
annotations: List[Segment]
85+
reference_id: str
9686
annotation_id: Optional[str] = None
97-
reference_id: Optional[str] = None
98-
item_id: Optional[str] = None
9987

10088
def __post_init__(self):
10189
if not self.mask_url:
10290
raise Exception("You must specify a mask_url.")
103-
self._check_ids()
10491

10592
@classmethod
10693
def from_json(cls, payload: dict):
@@ -112,8 +99,7 @@ def from_json(cls, payload: dict):
11299
Segment.from_json(ann)
113100
for ann in payload.get(ANNOTATIONS_KEY, [])
114101
],
115-
reference_id=payload.get(REFERENCE_ID_KEY, None),
116-
item_id=payload.get(ITEM_ID_KEY, None),
102+
reference_id=payload[REFERENCE_ID_KEY],
117103
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
118104
)
119105

@@ -124,10 +110,9 @@ def to_payload(self) -> dict:
124110
ANNOTATIONS_KEY: [ann.to_payload() for ann in self.annotations],
125111
ANNOTATION_ID_KEY: self.annotation_id,
126112
}
127-
if self.reference_id:
128-
payload[REFERENCE_ID_KEY] = self.reference_id
129-
else:
130-
payload[ITEM_ID_KEY] = self.item_id
113+
114+
payload[REFERENCE_ID_KEY] = self.reference_id
115+
131116
return payload
132117

133118

@@ -144,14 +129,14 @@ class BoxAnnotation(Annotation): # pylint: disable=R0902
144129
y: Union[float, int]
145130
width: Union[float, int]
146131
height: Union[float, int]
147-
reference_id: Optional[str] = None
148-
item_id: Optional[str] = None
132+
reference_id: str
149133
annotation_id: Optional[str] = None
150134
metadata: Optional[Dict] = None
151135

152136
def __post_init__(self):
153-
self._check_ids()
154137
self.metadata = self.metadata if self.metadata else {}
138+
if self.annotation_id is None:
139+
self.annotation_id = f"{self.label}-{self.x}-{self.y}-{self.width}-{self.height}-{self.reference_id}"
155140

156141
@classmethod
157142
def from_json(cls, payload: dict):
@@ -162,8 +147,7 @@ def from_json(cls, payload: dict):
162147
y=geometry.get(Y_KEY, 0),
163148
width=geometry.get(WIDTH_KEY, 0),
164149
height=geometry.get(HEIGHT_KEY, 0),
165-
reference_id=payload.get(REFERENCE_ID_KEY, None),
166-
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
150+
reference_id=payload[REFERENCE_ID_KEY],
167151
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
168152
metadata=payload.get(METADATA_KEY, {}),
169153
)
@@ -215,13 +199,11 @@ def to_payload(self) -> dict:
215199
class PolygonAnnotation(Annotation):
216200
label: str
217201
vertices: List[Point]
218-
reference_id: Optional[str] = None
219-
item_id: Optional[str] = None
202+
reference_id: str
220203
annotation_id: Optional[str] = None
221204
metadata: Optional[Dict] = None
222205

223206
def __post_init__(self):
224-
self._check_ids()
225207
self.metadata = self.metadata if self.metadata else {}
226208
if len(self.vertices) > 0:
227209
if not hasattr(self.vertices[0], X_KEY) or not hasattr(
@@ -245,8 +227,7 @@ def from_json(cls, payload: dict):
245227
vertices=[
246228
Point.from_json(_) for _ in geometry.get(VERTICES_KEY, [])
247229
],
248-
reference_id=payload.get(REFERENCE_ID_KEY, None),
249-
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
230+
reference_id=payload[REFERENCE_ID_KEY],
250231
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
251232
metadata=payload.get(METADATA_KEY, {}),
252233
)
@@ -271,13 +252,11 @@ class CuboidAnnotation(Annotation): # pylint: disable=R0902
271252
position: Point3D
272253
dimensions: Point3D
273254
yaw: float
274-
reference_id: Optional[str] = None
275-
item_id: Optional[str] = None
255+
reference_id: str
276256
annotation_id: Optional[str] = None
277257
metadata: Optional[Dict] = None
278258

279259
def __post_init__(self):
280-
self._check_ids()
281260
self.metadata = self.metadata if self.metadata else {}
282261

283262
@classmethod
@@ -288,8 +267,7 @@ def from_json(cls, payload: dict):
288267
position=Point3D.from_json(geometry.get(POSITION_KEY, {})),
289268
dimensions=Point3D.from_json(geometry.get(DIMENSIONS_KEY, {})),
290269
yaw=geometry.get(YAW_KEY, 0),
291-
reference_id=payload.get(REFERENCE_ID_KEY, None),
292-
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
270+
reference_id=payload[REFERENCE_ID_KEY],
293271
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
294272
metadata=payload.get(METADATA_KEY, {}),
295273
)
@@ -304,8 +282,7 @@ def to_payload(self) -> dict:
304282
YAW_KEY: self.yaw,
305283
},
306284
}
307-
if self.reference_id:
308-
payload[REFERENCE_ID_KEY] = self.reference_id
285+
payload[REFERENCE_ID_KEY] = self.reference_id
309286
if self.annotation_id:
310287
payload[ANNOTATION_ID_KEY] = self.annotation_id
311288
if self.metadata:

nucleus/constants.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
CX_KEY = "cx"
1919
CY_KEY = "cy"
2020
DATASET_ID_KEY = "dataset_id"
21-
DATASET_ITEM_IDS_KEY = "dataset_item_ids"
22-
DATASET_ITEM_ID_KEY = "dataset_item_id"
2321
DATASET_LENGTH_KEY = "length"
2422
DATASET_MODEL_RUNS_KEY = "model_run_ids"
2523
DATASET_NAME_KEY = "name"
@@ -46,7 +44,6 @@
4644
INDEX_KEY = "index"
4745
INDEX_CONTINUOUS_ENABLE_KEY = "enable"
4846
ITEMS_KEY = "items"
49-
ITEM_ID_KEY = "item_id"
5047
ITEM_KEY = "item"
5148
ITEMS_KEY = "items"
5249
ITEM_METADATA_SCHEMA_KEY = "item_metadata_schema"

0 commit comments

Comments
 (0)