Skip to content

Commit b7c7aaa

Browse files
Drew KaulDrew Kaul
authored andcommitted
implement append_scenes initial
1 parent 117962f commit b7c7aaa

File tree

6 files changed

+85
-22
lines changed

6 files changed

+85
-22
lines changed

nucleus/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
REFERENCE_IDS_KEY = "reference_ids"
7272
REFERENCE_ID_KEY = "reference_id"
7373
REQUEST_ID_KEY = "requestId"
74-
SCENES = "scenes"
74+
SCENES_KEY = "scenes"
7575
SEGMENTATIONS_KEY = "segmentations"
7676
SLICE_ID_KEY = "slice_id"
7777
STATUS_CODE_KEY = "status_code"

nucleus/dataset.py

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
NAME_KEY,
2828
REFERENCE_IDS_KEY,
2929
REQUEST_ID_KEY,
30-
SCENES,
30+
SCENES_KEY,
3131
UPDATE_KEY,
3232
URL,
3333
)
@@ -36,7 +36,11 @@
3636
check_all_paths_remote,
3737
check_for_duplicate_reference_ids,
3838
)
39-
from .payload_constructor import construct_model_run_creation_payload
39+
from .scene import LidarScene, check_all_scene_paths_remote
40+
from .payload_constructor import (
41+
construct_append_scenes_payload,
42+
construct_model_run_creation_payload,
43+
)
4044

4145
WARN_FOR_LARGE_UPLOAD = 50000
4246

@@ -199,16 +203,16 @@ def ingest_tasks(self, task_ids: dict):
199203

200204
def append(
201205
self,
202-
dataset_items: List[DatasetItem],
206+
items: Union[List[DatasetItem], List[LidarScene]],
203207
update: Optional[bool] = False,
204208
batch_size: Optional[int] = 20,
205209
asynchronous=False,
206210
) -> Union[dict, AsyncJob]:
207211
"""
208-
Appends images with metadata (dataset items) to the dataset. Overwrites images on collision if forced.
212+
Appends scenes or images with metadata (dataset items) to the dataset. Overwrites images on collision if forced.
209213
210214
Parameters:
211-
:param dataset_items: items to upload
215+
:param items: items to upload
212216
:param update: if True overwrites images and metadata on collision
213217
:param batch_size: batch parameter for long uploads
214218
:param aynchronous: if True, return a job object representing asynchronous ingestion job.
@@ -220,9 +224,20 @@ def append(
220224
'ignored_items': int,
221225
}
222226
"""
223-
check_for_duplicate_reference_ids(dataset_items)
227+
all_dataset_items = all(
228+
(isinstance(item, DatasetItem) for item in items)
229+
)
230+
all_scenes = all((isinstance(item, LidarScene) for item in items))
231+
if not all_dataset_items and not all_scenes:
232+
raise Exception(
233+
"You must append either DatasetItems or Scenes to the dataset."
234+
)
235+
if all_scenes:
236+
return self.append_scenes(items, update, asynchronous)
237+
238+
check_for_duplicate_reference_ids(items)
224239

225-
if len(dataset_items) > WARN_FOR_LARGE_UPLOAD and not asynchronous:
240+
if len(items) > WARN_FOR_LARGE_UPLOAD and not asynchronous:
226241
print(
227242
"Tip: for large uploads, get faster performance by importing your data "
228243
"into Nucleus directly from a cloud storage provider. See "
@@ -231,9 +246,9 @@ def append(
231246
)
232247

233248
if asynchronous:
234-
check_all_paths_remote(dataset_items)
249+
check_all_paths_remote(items)
235250
request_id = serialize_and_write_to_presigned_url(
236-
dataset_items, self.id, self._client
251+
items, self.id, self._client
237252
)
238253
response = self._client.make_request(
239254
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
@@ -243,11 +258,36 @@ def append(
243258

244259
return self._client.populate_dataset(
245260
self.id,
246-
dataset_items,
261+
items,
247262
update=update,
248263
batch_size=batch_size,
249264
)
250265

266+
def append_scenes(
267+
self,
268+
scenes: List[LidarScene],
269+
update: Optional[bool] = False,
270+
asynchronous: Optional[bool] = False,
271+
) -> Union[dict, AsyncJob]:
272+
"""TODO: Add updated docstring here"""
273+
if asynchronous:
274+
check_all_scene_paths_remote(scenes)
275+
request_id = serialize_and_write_to_presigned_url(
276+
scenes, self.id, self._client
277+
)
278+
response = self._client.make_request(
279+
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
280+
route=f"{self.id}/upload_scenes?async=1",
281+
)
282+
return AsyncJob.from_json(response, self._client)
283+
284+
payload = construct_append_scenes_payload(scenes, update)
285+
response = self._client.make_request(
286+
payload=payload,
287+
route=f"{self.id}/upload_scenes",
288+
)
289+
return response
290+
251291
def upload_scenes(
252292
self,
253293
payload: dict,
@@ -268,7 +308,7 @@ def upload_scenes(
268308
}
269309
"""
270310
if asynchronous:
271-
for scene in payload[SCENES]:
311+
for scene in payload[SCENES_KEY]:
272312
for frame in scene[FRAMES]:
273313
check_all_frame_paths_remote(frame[URL])
274314
request_id = serialize_and_write_to_presigned_url(

nucleus/payload_constructor.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import List, Optional, Dict, Union
22
from .dataset_item import DatasetItem
3+
from .scene import LidarScene
34
from .annotation import (
45
BoxAnnotation,
56
CuboidAnnotation,
@@ -19,6 +20,7 @@
1920
REFERENCE_ID_KEY,
2021
ANNOTATIONS_KEY,
2122
ITEMS_KEY,
23+
SCENES_KEY,
2224
UPDATE_KEY,
2325
MODEL_ID_KEY,
2426
ANNOTATION_METADATA_SCHEMA_KEY,
@@ -40,6 +42,15 @@ def construct_append_payload(
4042
)
4143

4244

45+
def construct_append_scenes_payload(
46+
scene_list: List[LidarScene], update: Optional[bool] = False
47+
) -> dict:
48+
scenes = []
49+
for scene in scene_list:
50+
scenes.append(scene.to_payload())
51+
return {SCENES_KEY: scenes, UPDATE_KEY: update}
52+
53+
4354
def construct_annotation_payload(
4455
annotation_items: List[
4556
Union[

nucleus/scene.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
Z_KEY,
2525
)
2626
from .annotation import Point3D
27-
from .utils import flatten
27+
from .dataset_item import is_local_path
2828

2929

3030
class DatasetItemType(Enum):
@@ -257,3 +257,18 @@ def validate(self):
257257
assert (
258258
num_pointclouds == 1
259259
), "Each frame of a lidar scene must have exactly 1 pointcloud"
260+
261+
262+
def flatten(t):
263+
return [item for sublist in t for item in sublist]
264+
265+
266+
def check_all_scene_paths_remote(scenes: List[LidarScene]):
267+
for scene in scenes:
268+
for frame in scene.frames_dict.values():
269+
for item in frame.items.values():
270+
if is_local_path(getattr(item, URL_KEY)):
271+
raise ValueError(
272+
f"All paths for SceneDatasetItems must be remote, but {item.url} is either "
273+
"local, or a remote URL type that is not supported."
274+
)

nucleus/utils.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,13 @@
2929
)
3030
from .dataset_item import DatasetItem
3131
from .prediction import BoxPrediction, CuboidPrediction, PolygonPrediction
32+
from .scene import LidarScene
3233

3334

3435
def _get_all_field_values(metadata_list: List[dict], key: str):
3536
return {metadata[key] for metadata in metadata_list if key in metadata}
3637

3738

38-
def flatten(t):
39-
return [item for sublist in t for item in sublist]
40-
41-
4239
def suggest_metadata_schema(
4340
data: Union[
4441
List[DatasetItem],
@@ -125,7 +122,7 @@ def convert_export_payload(api_payload):
125122

126123

127124
def serialize_and_write(
128-
upload_units: Sequence[Union[DatasetItem, Annotation, Dict]], file_pointer
125+
upload_units: Sequence[Union[DatasetItem, Annotation, LidarScene]], file_pointer
129126
):
130127
for unit in upload_units:
131128
try:
@@ -159,7 +156,7 @@ def upload_to_presigned_url(presigned_url: str, file_pointer: IO):
159156

160157

161158
def serialize_and_write_to_presigned_url(
162-
upload_units: Sequence[Union[DatasetItem, Annotation, Dict]],
159+
upload_units: Sequence[Union[DatasetItem, Annotation, LidarScene]],
163160
dataset_id: str,
164161
client,
165162
):

tests/test_scene.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from nucleus.constants import SCENES
1+
from nucleus.constants import SCENES_KEY
22
import pytest
33

44
from .helpers import (
@@ -27,7 +27,7 @@ def test_scene_upload_sync(dataset):
2727
response = dataset.upload_scenes(payload)
2828

2929
assert response["dataset_id"] == dataset.id
30-
assert response["new_scenes"] == len(TEST_LIDAR_SCENES[SCENES])
30+
assert response["new_scenes"] == len(TEST_LIDAR_SCENES[SCENES_KEY])
3131

3232

3333
@pytest.mark.integration
@@ -36,7 +36,7 @@ def test_scene_and_cuboid_upload_sync(dataset):
3636
response = dataset.upload_scenes(payload)
3737

3838
assert response["dataset_id"] == dataset.id
39-
assert response["new_scenes"] == len(TEST_LIDAR_SCENES[SCENES])
39+
assert response["new_scenes"] == len(TEST_LIDAR_SCENES[SCENES_KEY])
4040

4141
TEST_CUBOID_ANNOTATIONS[0]["dataset_item_id"] = dataset.items[0].item_id
4242
annotations = [CuboidAnnotation.from_json(TEST_CUBOID_ANNOTATIONS[0])]

0 commit comments

Comments
 (0)