Skip to content

Commit 8870071

Browse files
author
Ubuntu
committed
retries, batch size max, require async for scenes
1 parent 02683eb commit 8870071

File tree

2 files changed

+32
-10
lines changed

2 files changed

+32
-10
lines changed

nucleus/__init__.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def populate_dataset(
323323
self,
324324
dataset_id: str,
325325
dataset_items: List[DatasetItem],
326-
batch_size: int = 30,
326+
batch_size: int = 20,
327327
update: bool = False,
328328
):
329329
"""
@@ -494,6 +494,9 @@ async def _make_files_request(
494494
files,
495495
route: str,
496496
session: aiohttp.ClientSession,
497+
retry_attempt=0,
498+
max_retries=3,
499+
sleep_intervals=(1, 3, 9),
497500
):
498501
"""
499502
Makes an async post request with files to a Nucleus endpoint.
@@ -518,6 +521,7 @@ async def _make_files_request(
518521
)
519522

520523
for sleep_time in RetryStrategy.sleep_times + [-1]:
524+
521525
async with session.post(
522526
endpoint,
523527
data=form,
@@ -541,15 +545,27 @@ async def _make_files_request(
541545
continue
542546

543547
if not response.ok:
544-
self.handle_bad_response(
545-
endpoint,
546-
session.post,
547-
aiohttp_response=(
548-
response.status,
549-
response.reason,
550-
data,
551-
),
552-
)
548+
if retry_attempt < max_retries:
549+
time.sleep(sleep_intervals[retry_attempt])
550+
retry_attempt += 1
551+
return self._make_files_request(
552+
files,
553+
route,
554+
session,
555+
retry_attempt,
556+
max_retries,
557+
sleep_intervals,
558+
)
559+
else:
560+
self.handle_bad_response(
561+
endpoint,
562+
session.post,
563+
aiohttp_response=(
564+
response.status,
565+
response.reason,
566+
data,
567+
),
568+
)
553569

554570
return data
555571

nucleus/dataset.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,9 @@ def append(
261261
'ignored_items': int,
262262
}
263263
"""
264+
assert (
265+
batch_size is None or batch_size < 30
266+
), "Please specify a batch size smaller than 30 to avoid timeouts."
264267
dataset_items = [
265268
item for item in items if isinstance(item, DatasetItem)
266269
]
@@ -270,6 +273,9 @@ def append(
270273
"You must append either DatasetItems or Scenes to the dataset."
271274
)
272275
if scenes:
276+
assert (
277+
asynchronous
278+
), "In order to avoid timeouts, you must set asynchronous=True when uploading scenes."
273279
return self.append_scenes(scenes, update, asynchronous)
274280

275281
check_for_duplicate_reference_ids(dataset_items)

0 commit comments

Comments
 (0)