Skip to content

Commit a99859e

Browse files
author
Claire Pajot
committed
Merged
2 parents 29aeec6 + d653ba3 commit a99859e

16 files changed

+323
-224
lines changed

nucleus/__init__.py

Lines changed: 14 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,7 @@
11
"""
22
Nucleus Python Library.
33
4-
Data formats used:
5-
6-
_____________________________________________________________________________________________________
7-
8-
DatasetItem
9-
10-
image_url | str | The URL containing the image for the given row of data.\n
11-
reference_id | str | An optional user-specified identifier to reference this given image.\n
12-
metadata | dict | All of column definitions for this item.
13-
| | The keys should match the user-specified column names,
14-
| | and the corresponding values will populate the cell under the column.\n
15-
_____________________________________________________________________________________________________
16-
17-
18-
Box2DGeometry:
19-
20-
x | float | The distance, in pixels, between the left border of the bounding box
21-
| | and the left border of the image.\n
22-
y | float | The distance, in pixels, between the top border of the bounding box
23-
| | and the top border of the image.\n
24-
width | float | The width in pixels of the annotation.\n
25-
height | float | The height in pixels of the annotation.\n
26-
27-
Box2DAnnotation:
28-
29-
item_id | str | The internally-controlled item identifier to associate this annotation with.
30-
| | The reference_id field should be empty if this field is populated.\n
31-
reference_id | str | The user-specified reference identifier to associate this annotation with.\n
32-
| | The item_id field should be empty if this field is populated.
33-
label | str | The label for this annotation (e.g. car, pedestrian, bicycle).\n
34-
type | str | The type of this annotation. It should always be the box string literal.\n
35-
geometry | dict | Representation of the bounding box in the Box2DGeometry format.\n
36-
metadata | dict | An arbitrary metadata blob for the annotation.\n
37-
38-
_____________________________________________________________________________________________________
39-
40-
Box2DDetection:
41-
42-
item_id | str | The internally-controlled item identifier to associate this annotation with.
43-
| | The reference_id field should be empty if this field is populated.\n
44-
reference_id | str | The user-specified reference identifier to associate this annotation with.
45-
| | The item_id field should be empty if this field is populated.\n
46-
label | str | The label for this annotation (e.g. car, pedestrian, bicycle).\n
47-
type | str | The type of this annotation. It should always be the box string literal.\n
48-
confidence | float | The optional confidence level of this annotation.
49-
| | It should be between 0 and 1 (inclusive).\n
50-
geometry | dict | Representation of the bounding box in the Box2DGeometry format.\n
51-
metadata | dict | An arbitrary metadata blob for the annotation.\n
4+
For full documentation see: https://dashboard.scale.com/nucleus/docs/api?language=python
525
"""
536
import asyncio
547
import json
@@ -83,7 +36,6 @@
8336
ANNOTATIONS_PROCESSED_KEY,
8437
AUTOTAGS_KEY,
8538
DATASET_ID_KEY,
86-
DATASET_ITEM_IDS_KEY,
8739
DEFAULT_NETWORK_TIMEOUT_SEC,
8840
EMBEDDING_DIMENSION_KEY,
8941
EMBEDDINGS_URL_KEY,
@@ -246,11 +198,8 @@ def get_dataset_items(self, dataset_id) -> List[DatasetItem]:
246198
for item in dataset_items:
247199
image_url = item.get("original_image_url")
248200
metadata = item.get("metadata", None)
249-
item_id = item.get("id", None)
250201
ref_id = item.get("ref_id", None)
251-
dataset_item = DatasetItem(
252-
image_url, ref_id, item_id, metadata
253-
)
202+
dataset_item = DatasetItem(image_url, ref_id, metadata)
254203
constructed_dataset_items.append(dataset_item)
255204
elif error:
256205
raise DatasetItemRetrievalError(message=error)
@@ -351,26 +300,19 @@ def delete_dataset(self, dataset_id: str) -> dict:
351300
return self.make_request({}, f"dataset/{dataset_id}", requests.delete)
352301

353302
@sanitize_string_args
354-
def delete_dataset_item(
355-
self, dataset_id: str, item_id: str = None, reference_id: str = None
356-
) -> dict:
303+
def delete_dataset_item(self, dataset_id: str, reference_id) -> dict:
357304
"""
358305
Deletes a private dataset based on datasetId.
359306
Returns an empty payload where response status `200` indicates
360307
the dataset has been successfully deleted.
361308
:param payload: { "name": str }
362309
:return: { "dataset_id": str, "name": str }
363310
"""
364-
if item_id:
365-
return self.make_request(
366-
{}, f"dataset/{dataset_id}/{item_id}", requests.delete
367-
)
368-
else: # Assume reference_id is provided
369-
return self.make_request(
370-
{},
371-
f"dataset/{dataset_id}/refloc/{reference_id}",
372-
requests.delete,
373-
)
311+
return self.make_request(
312+
{},
313+
f"dataset/{dataset_id}/refloc/{reference_id}",
314+
requests.delete,
315+
)
374316

375317
def populate_dataset(
376318
self,
@@ -1018,17 +960,13 @@ def create_slice(self, dataset_id: str, payload: dict) -> Slice:
1018960
as a means of identifying items in the dataset.
1019961
1020962
"name" -- The human-readable name of the slice.
1021-
1022-
"dataset_item_ids" -- An optional list of dataset item ids for the items in the slice
1023-
1024963
"reference_ids" -- An optional list of user-specified identifier for the items in the slice
1025964
1026965
:param
1027966
dataset_id: id of the dataset
1028967
payload:
1029968
{
1030969
"name": str,
1031-
"dataset_item_ids": List[str],
1032970
"reference_ids": List[str],
1033971
}
1034972
:return: new Slice object
@@ -1054,14 +992,12 @@ def slice_info(self, slice_id: str) -> dict:
1054992
1055993
:param
1056994
slice_id: id of the slice
1057-
id_type: the type of IDs you want in response (either "reference_id" or "dataset_item_id")
1058-
to identify the DatasetItems
1059995
1060996
:return:
1061997
{
1062998
"name": str,
1063999
"dataset_id": str,
1064-
"dataset_item_ids": List[str],
1000+
"reference_ids": List[str],
10651001
}
10661002
"""
10671003
response = self.make_request(
@@ -1113,35 +1049,25 @@ def delete_annotations(
11131049
def append_to_slice(
11141050
self,
11151051
slice_id: str,
1116-
dataset_item_ids: List[str] = None,
1117-
reference_ids: List[str] = None,
1052+
reference_ids: List[str],
11181053
) -> dict:
11191054
"""
11201055
Appends to a slice from items already present in a dataset.
11211056
The caller must exclusively use either datasetItemIds or reference_ids
11221057
as a means of identifying items in the dataset.
11231058
11241059
:param
1125-
dataset_item_ids: List[str],
11261060
reference_ids: List[str],
11271061
11281062
:return:
11291063
{
11301064
"slice_id": str,
11311065
}
11321066
"""
1133-
if dataset_item_ids and reference_ids:
1134-
raise Exception(
1135-
"You cannot specify both dataset_item_ids and reference_ids"
1136-
)
11371067

1138-
ids_to_append: Dict[str, Any] = {}
1139-
if dataset_item_ids:
1140-
ids_to_append[DATASET_ITEM_IDS_KEY] = dataset_item_ids
1141-
if reference_ids:
1142-
ids_to_append[REFERENCE_IDS_KEY] = reference_ids
1143-
1144-
response = self.make_request(ids_to_append, f"slice/{slice_id}/append")
1068+
response = self.make_request(
1069+
{REFERENCE_IDS_KEY: reference_ids}, f"slice/{slice_id}/append"
1070+
)
11451071
return response
11461072

11471073
def list_autotags(self, dataset_id: str) -> List[str]:
@@ -1194,7 +1120,7 @@ def create_custom_index(
11941120
11951121
:param
11961122
dataset_id: id of dataset that the custom index is being added to.
1197-
embeddings_urls: list of urls, each of which being a json mapping dataset_item_id -> embedding vector
1123+
embeddings_urls: list of urls, each of which being a json mapping reference_id -> embedding vector
11981124
embedding_dim: the dimension of the embedding vectors, must be consistent for all embedding vectors in the index.
11991125
"""
12001126
return self.make_request(

nucleus/annotation.py

Lines changed: 17 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,10 @@
1010
BOX_TYPE,
1111
CATEGORY_TYPE,
1212
CUBOID_TYPE,
13-
DATASET_ITEM_ID_KEY,
1413
DIMENSIONS_KEY,
1514
GEOMETRY_KEY,
1615
HEIGHT_KEY,
1716
INDEX_KEY,
18-
ITEM_ID_KEY,
1917
LABEL_KEY,
2018
MASK_TYPE,
2119
MASK_URL_KEY,
@@ -35,16 +33,7 @@
3533

3634

3735
class Annotation:
38-
reference_id: Optional[str] = None
39-
item_id: Optional[str] = None
40-
41-
def _check_ids(self):
42-
if self.reference_id and self.item_id:
43-
self.item_id = None # Prefer reference id to item id.
44-
if not (self.reference_id or self.item_id):
45-
raise Exception(
46-
"You must specify either a reference_id or an item_id for an annotation."
47-
)
36+
reference_id: str
4837

4938
@classmethod
5039
def from_json(cls, payload: dict):
@@ -95,14 +84,12 @@ def to_payload(self) -> dict:
9584
class SegmentationAnnotation(Annotation):
9685
mask_url: str
9786
annotations: List[Segment]
87+
reference_id: str
9888
annotation_id: Optional[str] = None
99-
reference_id: Optional[str] = None
100-
item_id: Optional[str] = None
10189

10290
def __post_init__(self):
10391
if not self.mask_url:
10492
raise Exception("You must specify a mask_url.")
105-
self._check_ids()
10693

10794
@classmethod
10895
def from_json(cls, payload: dict):
@@ -114,8 +101,7 @@ def from_json(cls, payload: dict):
114101
Segment.from_json(ann)
115102
for ann in payload.get(ANNOTATIONS_KEY, [])
116103
],
117-
reference_id=payload.get(REFERENCE_ID_KEY, None),
118-
item_id=payload.get(ITEM_ID_KEY, None),
104+
reference_id=payload[REFERENCE_ID_KEY],
119105
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
120106
)
121107

@@ -126,10 +112,9 @@ def to_payload(self) -> dict:
126112
ANNOTATIONS_KEY: [ann.to_payload() for ann in self.annotations],
127113
ANNOTATION_ID_KEY: self.annotation_id,
128114
}
129-
if self.reference_id:
130-
payload[REFERENCE_ID_KEY] = self.reference_id
131-
else:
132-
payload[ITEM_ID_KEY] = self.item_id
115+
116+
payload[REFERENCE_ID_KEY] = self.reference_id
117+
133118
return payload
134119

135120

@@ -147,14 +132,14 @@ class BoxAnnotation(Annotation): # pylint: disable=R0902
147132
y: Union[float, int]
148133
width: Union[float, int]
149134
height: Union[float, int]
150-
reference_id: Optional[str] = None
151-
item_id: Optional[str] = None
135+
reference_id: str
152136
annotation_id: Optional[str] = None
153137
metadata: Optional[Dict] = None
154138

155139
def __post_init__(self):
156-
self._check_ids()
157140
self.metadata = self.metadata if self.metadata else {}
141+
if self.annotation_id is None:
142+
self.annotation_id = f"{self.label}-{self.x}-{self.y}-{self.width}-{self.height}-{self.reference_id}"
158143

159144
@classmethod
160145
def from_json(cls, payload: dict):
@@ -165,8 +150,7 @@ def from_json(cls, payload: dict):
165150
y=geometry.get(Y_KEY, 0),
166151
width=geometry.get(WIDTH_KEY, 0),
167152
height=geometry.get(HEIGHT_KEY, 0),
168-
reference_id=payload.get(REFERENCE_ID_KEY, None),
169-
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
153+
reference_id=payload[REFERENCE_ID_KEY],
170154
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
171155
metadata=payload.get(METADATA_KEY, {}),
172156
)
@@ -218,13 +202,11 @@ def to_payload(self) -> dict:
218202
class PolygonAnnotation(Annotation):
219203
label: str
220204
vertices: List[Point]
221-
reference_id: Optional[str] = None
222-
item_id: Optional[str] = None
205+
reference_id: str
223206
annotation_id: Optional[str] = None
224207
metadata: Optional[Dict] = None
225208

226209
def __post_init__(self):
227-
self._check_ids()
228210
self.metadata = self.metadata if self.metadata else {}
229211
if len(self.vertices) > 0:
230212
if not hasattr(self.vertices[0], X_KEY) or not hasattr(
@@ -248,8 +230,7 @@ def from_json(cls, payload: dict):
248230
vertices=[
249231
Point.from_json(_) for _ in geometry.get(VERTICES_KEY, [])
250232
],
251-
reference_id=payload.get(REFERENCE_ID_KEY, None),
252-
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
233+
reference_id=payload[REFERENCE_ID_KEY],
253234
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
254235
metadata=payload.get(METADATA_KEY, {}),
255236
)
@@ -274,13 +255,11 @@ class CuboidAnnotation(Annotation): # pylint: disable=R0902
274255
position: Point3D
275256
dimensions: Point3D
276257
yaw: float
277-
reference_id: Optional[str] = None
278-
item_id: Optional[str] = None
258+
reference_id: str
279259
annotation_id: Optional[str] = None
280260
metadata: Optional[Dict] = None
281261

282262
def __post_init__(self):
283-
self._check_ids()
284263
self.metadata = self.metadata if self.metadata else {}
285264

286265
@classmethod
@@ -291,8 +270,7 @@ def from_json(cls, payload: dict):
291270
position=Point3D.from_json(geometry.get(POSITION_KEY, {})),
292271
dimensions=Point3D.from_json(geometry.get(DIMENSIONS_KEY, {})),
293272
yaw=geometry.get(YAW_KEY, 0),
294-
reference_id=payload.get(REFERENCE_ID_KEY, None),
295-
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
273+
reference_id=payload[REFERENCE_ID_KEY],
296274
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
297275
metadata=payload.get(METADATA_KEY, {}),
298276
)
@@ -307,8 +285,7 @@ def to_payload(self) -> dict:
307285
YAW_KEY: self.yaw,
308286
},
309287
}
310-
if self.reference_id:
311-
payload[REFERENCE_ID_KEY] = self.reference_id
288+
payload[REFERENCE_ID_KEY] = self.reference_id
312289
if self.annotation_id:
313290
payload[ANNOTATION_ID_KEY] = self.annotation_id
314291
if self.metadata:
@@ -321,21 +298,18 @@ def to_payload(self) -> dict:
321298
class CategoryAnnotation(Annotation):
322299
label: str
323300
taxonomy_name: str
324-
reference_id: Optional[str] = None
325-
item_id: Optional[str] = None
301+
reference_id: str
326302
metadata: Optional[Dict] = None
327303

328304
def __post_init__(self):
329-
self._check_ids()
330305
self.metadata = self.metadata if self.metadata else {}
331306

332307
@classmethod
333308
def from_json(cls, payload: dict):
334309
return cls(
335310
label=payload.get(LABEL_KEY, 0),
336311
taxonomy_name=payload.get(TAXONOMY_NAME_KEY, None),
337-
reference_id=payload.get(REFERENCE_ID_KEY, None),
338-
item_id=payload.get(DATASET_ITEM_ID_KEY, None),
312+
reference_id=payload[REFERENCE_ID_KEY],
339313
metadata=payload.get(METADATA_KEY, {}),
340314
)
341315

0 commit comments

Comments
 (0)