13
13
from .annotation import (
14
14
Annotation ,
15
15
check_all_mask_paths_remote ,
16
- check_all_frame_paths_remote ,
17
16
)
18
17
from .constants import (
19
18
DATASET_ITEM_IDS_KEY ,
23
22
DATASET_SLICES_KEY ,
24
23
DEFAULT_ANNOTATION_UPDATE_MODE ,
25
24
EXPORTED_ROWS ,
26
- FRAMES ,
27
25
NAME_KEY ,
28
26
REFERENCE_IDS_KEY ,
29
27
REQUEST_ID_KEY ,
30
- SCENES ,
31
28
UPDATE_KEY ,
32
- URL ,
33
29
)
34
30
from .dataset_item import (
35
31
DatasetItem ,
36
32
check_all_paths_remote ,
37
33
check_for_duplicate_reference_ids ,
38
34
)
39
- from .payload_constructor import construct_model_run_creation_payload
35
+ from .scene import LidarScene , check_all_scene_paths_remote
36
+ from .payload_constructor import (
37
+ construct_append_scenes_payload ,
38
+ construct_model_run_creation_payload ,
39
+ )
40
40
41
41
WARN_FOR_LARGE_UPLOAD = 50000
42
+ WARN_FOR_LARGE_SCENES_UPLOAD = 5
42
43
43
44
44
45
class Dataset :
@@ -199,16 +200,16 @@ def ingest_tasks(self, task_ids: dict):
199
200
200
201
def append (
201
202
self ,
202
- dataset_items : List [DatasetItem ],
203
+ items : Union [ List [DatasetItem ], List [ LidarScene ] ],
203
204
update : Optional [bool ] = False ,
204
205
batch_size : Optional [int ] = 20 ,
205
206
asynchronous = False ,
206
207
) -> Union [dict , AsyncJob ]:
207
208
"""
208
- Appends images with metadata (dataset items) to the dataset. Overwrites images on collision if forced.
209
+ Appends images with metadata (dataset items) or scenes to the dataset. Overwrites images on collision if forced.
209
210
210
211
Parameters:
211
- :param dataset_items : items to upload
212
+ :param items : items to upload
212
213
:param update: if True overwrites images and metadata on collision
213
214
:param batch_size: batch parameter for long uploads
214
215
:param aynchronous: if True, return a job object representing asynchronous ingestion job.
@@ -220,6 +221,17 @@ def append(
220
221
'ignored_items': int,
221
222
}
222
223
"""
224
+ dataset_items = [
225
+ item for item in items if isinstance (item , DatasetItem )
226
+ ]
227
+ scenes = [item for item in items if isinstance (item , LidarScene )]
228
+ if dataset_items and scenes :
229
+ raise Exception (
230
+ "You must append either DatasetItems or Scenes to the dataset."
231
+ )
232
+ if scenes :
233
+ return self .append_scenes (scenes , update , asynchronous )
234
+
223
235
check_for_duplicate_reference_ids (dataset_items )
224
236
225
237
if len (dataset_items ) > WARN_FOR_LARGE_UPLOAD and not asynchronous :
@@ -248,39 +260,51 @@ def append(
248
260
batch_size = batch_size ,
249
261
)
250
262
251
- def upload_scenes (
263
+ def append_scenes (
252
264
self ,
253
- payload : dict ,
265
+ scenes : List [ LidarScene ] ,
254
266
update : Optional [bool ] = False ,
255
- asynchronous : bool = False ,
267
+ asynchronous : Optional [ bool ] = False ,
256
268
) -> Union [dict , AsyncJob ]:
257
269
"""
258
- Uploads scenes with given frames to the dataset
270
+ Appends scenes with given frames (containing pointclouds and optional images) to the dataset
259
271
260
272
Parameters:
261
- :param payload: dictionary containing scenes to be uploaded
273
+ :param scenes: scenes to upload
262
274
:param update: if True, overwrite scene on collision
263
- :param aynchronous : if True, return a job object representing asynchronous ingestion job
275
+ :param asynchronous : if True, return a job object representing asynchronous ingestion job
264
276
:return:
265
277
{
266
278
'dataset_id': str,
267
279
'new_scenes': int,
280
+ 'ignored_scenes': int,
281
+ 'scenes_errored': int,
282
+ 'errors': List[str],
268
283
}
269
284
"""
285
+ for scene in scenes :
286
+ scene .validate ()
287
+
288
+ if len (scenes ) > WARN_FOR_LARGE_SCENES_UPLOAD and not asynchronous :
289
+ print (
290
+ "Tip: for large uploads, get faster performance by importing your data "
291
+ "into Nucleus directly from a cloud storage provider. See "
292
+ "https://dashboard.scale.com/nucleus/docs/api?language=python#guide-for-large-ingestions"
293
+ " for details."
294
+ )
295
+
270
296
if asynchronous :
271
- for scene in payload [SCENES ]:
272
- for frame in scene [FRAMES ]:
273
- check_all_frame_paths_remote (frame [URL ])
297
+ check_all_scene_paths_remote (scenes )
274
298
request_id = serialize_and_write_to_presigned_url (
275
- [ payload ] , self .id , self ._client
299
+ scenes , self .id , self ._client
276
300
)
277
301
response = self ._client .make_request (
278
302
payload = {REQUEST_ID_KEY : request_id , UPDATE_KEY : update },
279
303
route = f"{ self .id } /upload_scenes?async=1" ,
280
304
)
281
305
return AsyncJob .from_json (response , self ._client )
282
306
283
- # TODO: create client method for sync scene upload
307
+ payload = construct_append_scenes_payload ( scenes , update )
284
308
response = self ._client .make_request (
285
309
payload = payload ,
286
310
route = f"{ self .id } /upload_scenes" ,
0 commit comments