Skip to content

Commit 3537ba8

Browse files
authored
fix items_and_annotation_generator (#336)
* fix items_and_annotation_generator * bump semver and changelog * add inttests * refactor slice async export inttest
1 parent f55db54 commit 3537ba8

File tree

6 files changed

+74
-11
lines changed

6 files changed

+74
-11
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [0.14.9](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.9) - 2022-07-14
99

10+
### Added
11+
- `Dataset.items_and_annotation_generator()`
12+
13+
### Fixed
14+
- `Slice.items_and_annotation_generator()` bug
15+
16+
## [0.14.9](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.9) - 2022-07-14
17+
1018
### Fixed
1119
- NoneType errors in Validate
1220

nucleus/dataset.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ def _append_video_scenes(
707707
return response
708708

709709
def iloc(self, i: int) -> dict:
710-
"""Retrieves dataset item by absolute numerical index.
710+
"""Retrieves dataset item and associated annotations by absolute numerical index.
711711
712712
Parameters:
713713
i: Absolute numerical index of the dataset item within the dataset.
@@ -735,7 +735,7 @@ def iloc(self, i: int) -> dict:
735735

736736
@sanitize_string_args
737737
def refloc(self, reference_id: str) -> dict:
738-
"""Retrieves a dataset item by reference ID.
738+
"""Retrieves a dataset item and associated annotations by reference ID.
739739
740740
Parameters:
741741
reference_id: User-defined reference ID of the dataset item.
@@ -762,7 +762,7 @@ def refloc(self, reference_id: str) -> dict:
762762
return format_dataset_item_response(response)
763763

764764
def loc(self, dataset_item_id: str) -> dict:
765-
"""Retrieves a dataset item by Nucleus-generated ID.
765+
"""Retrieves a dataset item and associated annotations by Nucleus-generated ID.
766766
767767
Parameters:
768768
dataset_item_id: Nucleus-generated dataset item ID (starts with ``di_``).
@@ -1178,9 +1178,9 @@ def items_and_annotations(
11781178
"cuboid": Optional[List[CuboidAnnotation]],
11791179
"line": Optional[List[LineAnnotation]],
11801180
"polygon": Optional[List[PolygonAnnotation]],
1181-
"keypoints": Optional[List[KeypointsAnnotation]],
11821181
"segmentation": Optional[List[SegmentationAnnotation]],
11831182
"category": Optional[List[CategoryAnnotation]],
1183+
"keypoints": Optional[List[KeypointsAnnotation]],
11841184
}
11851185
}]
11861186
"""
@@ -1191,6 +1191,32 @@ def items_and_annotations(
11911191
)
11921192
return convert_export_payload(api_payload[EXPORTED_ROWS])
11931193

1194+
def items_and_annotation_generator(
1195+
self,
1196+
) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
1197+
"""Provides a generator of all DatasetItems and Annotations in the dataset.
1198+
1199+
Returns:
1200+
Generator where each element is a dict containing the DatasetItem
1201+
and all of its associated Annotations, grouped by type.
1202+
::
1203+
1204+
Iterable[{
1205+
"item": DatasetItem,
1206+
"annotations": {
1207+
"box": List[BoxAnnotation],
1208+
"polygon": List[PolygonAnnotation],
1209+
"cuboid": List[CuboidAnnotation],
1210+
"line": Optional[List[LineAnnotation]],
1211+
"segmentation": List[SegmentationAnnotation],
1212+
"category": List[CategoryAnnotation],
1213+
"keypoints": List[KeypointsAnnotation],
1214+
}
1215+
}]
1216+
"""
1217+
for item in self.items_generator():
1218+
yield self.refloc(reference_id=item.reference_id)
1219+
11941220
def export_embeddings(
11951221
self,
11961222
) -> List[Dict[str, Union[str, List[float]]]]:

nucleus/slice.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def items_and_annotation_generator(
185185
186186
Returns:
187187
Generator where each element is a dict containing the DatasetItem
188-
and all of its associated Annotations, grouped by type.
188+
and all of its associated Annotations, grouped by type (e.g. box).
189189
::
190190
191191
Iterable[{
@@ -194,16 +194,18 @@ def items_and_annotation_generator(
194194
"box": List[BoxAnnotation],
195195
"polygon": List[PolygonAnnotation],
196196
"cuboid": List[CuboidAnnotation],
197+
"line": List[LineAnnotation],
197198
"segmentation": List[SegmentationAnnotation],
198199
"category": List[CategoryAnnotation],
200+
"keypoints": List[KeypointsAnnotation],
199201
}
200202
}]
201203
"""
202-
for item_metadata in self.items:
204+
for item in self.items_generator():
203205
yield format_dataset_item_response(
204-
self._client.dataitem_loc(
206+
self._client.dataitem_ref_id(
205207
dataset_id=self.dataset_id,
206-
dataset_item_id=item_metadata["id"],
208+
reference_id=item.reference_id,
207209
)
208210
)
209211

@@ -223,8 +225,10 @@ def items_and_annotations(
223225
"box": List[BoxAnnotation],
224226
"polygon": List[PolygonAnnotation],
225227
"cuboid": List[CuboidAnnotation],
228+
"line": List[LineAnnotation],
226229
"segmentation": List[SegmentationAnnotation],
227230
"category": List[CategoryAnnotation],
231+
"keypoints": List[KeypointsAnnotation],
228232
}
229233
}]
230234
"""

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.14.9"
24+
version = "0.14.10"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

tests/test_dataset.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,24 @@ def sort_labelmap(segmentation_annotation):
528528
== multicategory_annotation
529529
)
530530

531+
# test async export
532+
for row in dataset.items_and_annotation_generator():
533+
assert row[ITEM_KEY] == ds_items[0]
534+
assert row[ANNOTATIONS_KEY][BOX_TYPE][0] == box_annotation
535+
assert sort_labelmap(
536+
row[ANNOTATIONS_KEY][SEGMENTATION_TYPE][0]
537+
) == sort_labelmap(clear_fields(segmentation_annotation))
538+
assert row[ANNOTATIONS_KEY][POLYGON_TYPE][0] == polygon_annotation
539+
assert row[ANNOTATIONS_KEY][CATEGORY_TYPE][0] == category_annotation
540+
row[ANNOTATIONS_KEY][MULTICATEGORY_TYPE][0].labels = set(
541+
row[ANNOTATIONS_KEY][MULTICATEGORY_TYPE][0].labels
542+
)
543+
multicategory_annotation.labels = set(multicategory_annotation.labels)
544+
assert (
545+
row[ANNOTATIONS_KEY][MULTICATEGORY_TYPE][0]
546+
== multicategory_annotation
547+
)
548+
531549

532550
def test_dataset_item_metadata_update(dataset):
533551
items = make_dataset_items()

tests/test_slice.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,15 @@ def get_expected_item(reference_id):
100100
if item.reference_id == reference_id:
101101
return item
102102

103-
exported = slc.items_and_annotations()
104-
for row in exported:
103+
for row in slc.items_and_annotations():
104+
reference_id = row[ITEM_KEY].reference_id
105+
assert row[ITEM_KEY] == get_expected_item(reference_id)
106+
assert row[ANNOTATIONS_KEY][BOX_TYPE][
107+
0
108+
] == get_expected_box_annotation(reference_id)
109+
110+
# test async
111+
for row in slc.items_and_annotation_generator():
105112
reference_id = row[ITEM_KEY].reference_id
106113
assert row[ITEM_KEY] == get_expected_item(reference_id)
107114
assert row[ANNOTATIONS_KEY][BOX_TYPE][

0 commit comments

Comments
 (0)