File tree Expand file tree Collapse file tree 2 files changed +32
-10
lines changed Expand file tree Collapse file tree 2 files changed +32
-10
lines changed Original file line number Diff line number Diff line change @@ -323,7 +323,7 @@ def populate_dataset(
323
323
self ,
324
324
dataset_id : str ,
325
325
dataset_items : List [DatasetItem ],
326
- batch_size : int = 30 ,
326
+ batch_size : int = 20 ,
327
327
update : bool = False ,
328
328
):
329
329
"""
@@ -494,6 +494,9 @@ async def _make_files_request(
494
494
files ,
495
495
route : str ,
496
496
session : aiohttp .ClientSession ,
497
+ retry_attempt = 0 ,
498
+ max_retries = 3 ,
499
+ sleep_intervals = (1 , 3 , 9 ),
497
500
):
498
501
"""
499
502
Makes an async post request with files to a Nucleus endpoint.
@@ -518,6 +521,7 @@ async def _make_files_request(
518
521
)
519
522
520
523
for sleep_time in RetryStrategy .sleep_times + [- 1 ]:
524
+
521
525
async with session .post (
522
526
endpoint ,
523
527
data = form ,
@@ -541,15 +545,27 @@ async def _make_files_request(
541
545
continue
542
546
543
547
if not response .ok :
544
- self .handle_bad_response (
545
- endpoint ,
546
- session .post ,
547
- aiohttp_response = (
548
- response .status ,
549
- response .reason ,
550
- data ,
551
- ),
552
- )
548
+ if retry_attempt < max_retries :
549
+ time .sleep (sleep_intervals [retry_attempt ])
550
+ retry_attempt += 1
551
+ return self ._make_files_request (
552
+ files ,
553
+ route ,
554
+ session ,
555
+ retry_attempt ,
556
+ max_retries ,
557
+ sleep_intervals ,
558
+ )
559
+ else :
560
+ self .handle_bad_response (
561
+ endpoint ,
562
+ session .post ,
563
+ aiohttp_response = (
564
+ response .status ,
565
+ response .reason ,
566
+ data ,
567
+ ),
568
+ )
553
569
554
570
return data
555
571
Original file line number Diff line number Diff line change @@ -261,6 +261,9 @@ def append(
261
261
'ignored_items': int,
262
262
}
263
263
"""
264
+ assert (
265
+ batch_size is None or batch_size < 30
266
+ ), "Please specify a batch size smaller than 30 to avoid timeouts."
264
267
dataset_items = [
265
268
item for item in items if isinstance (item , DatasetItem )
266
269
]
@@ -270,6 +273,9 @@ def append(
270
273
"You must append either DatasetItems or Scenes to the dataset."
271
274
)
272
275
if scenes :
276
+ assert (
277
+ asynchronous
278
+ ), "In order to avoid timeouts, you must set asynchronous=True when uploading scenes."
273
279
return self .append_scenes (scenes , update , asynchronous )
274
280
275
281
check_for_duplicate_reference_ids (dataset_items )
You can’t perform that action at this time.
0 commit comments