Skip to content

Commit 275c6a5

Browse files
authored
Running metadata_update with async flag (#338)
1 parent b6b8432 commit 275c6a5

File tree

3 files changed

+47
-8
lines changed

3 files changed

+47
-8
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1919

2020
### Added
2121
- client.slices to list all of users slices independent of dataset
22+
- Added optional parameter `asynchronous: bool` to `Dataset.update_item_metadata` and `Dataset.update_scene_metadata`,
23+
allowing the update to run as a background job when set to `True`
2224

2325
### Fixed
2426
- Validate unit test listing and evaluation history listing. Now uses new bulk fetch endpoints for faster listing.
2527

28+
2629
## [0.14.13](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.13) - 2022-08-10
2730

2831
### Fixed
@@ -37,6 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3740
### Fixed
3841
- Change `{Dataset,Slice}.items_and_annotation_generator` to work with improved paginate endpoint
3942

43+
4044
## [0.14.11](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.11) - 2022-07-20
4145

4246
### Fixed

nucleus/dataset.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,14 +1723,17 @@ def _upload_items(
17231723
local_file_upload_concurrency=local_file_upload_concurrency,
17241724
)
17251725

1726-
def update_scene_metadata(self, mapping: Dict[str, dict]):
1726+
def update_scene_metadata(
1727+
self, mapping: Dict[str, dict], asynchronous: bool = False
1728+
):
17271729
"""
17281730
Update (merge) scene metadata for each reference_id given in the mapping.
17291731
The backend will join the specified mapping metadata to the existing metadata.
17301732
If there is a key-collision, the value given in the mapping will take precedence.
17311733
17321734
Args:
17331735
mapping: key-value pair of <reference_id>: <metadata>
1736+
asynchronous: if True, run the update as a background job
17341737
17351738
Examples:
17361739
>>> mapping = {"scene_ref_1": {"new_key": "foo"}, "scene_ref_2": {"some_value": 123}}
@@ -1740,11 +1743,17 @@ def update_scene_metadata(self, mapping: Dict[str, dict]):
17401743
A dictionary outlining success or failures.
17411744
"""
17421745
mm = MetadataManager(
1743-
self.id, self._client, mapping, ExportMetadataType.SCENES
1746+
self.id,
1747+
self._client,
1748+
mapping,
1749+
ExportMetadataType.SCENES,
1750+
asynchronous,
17441751
)
17451752
return mm.update()
17461753

1747-
def update_item_metadata(self, mapping: Dict[str, dict]):
1754+
def update_item_metadata(
1755+
self, mapping: Dict[str, dict], asynchronous: bool = False
1756+
):
17481757
"""
17491758
Update (merge) dataset item metadata for each reference_id given in the mapping.
17501759
The backend will join the specified mapping metadata to the existing metadata.
@@ -1755,6 +1764,7 @@ def update_item_metadata(self, mapping: Dict[str, dict]):
17551764
17561765
Args:
17571766
mapping: key-value pair of <reference_id>: <metadata>
1767+
asynchronous: if True, run the update as a background job
17581768
17591769
Examples:
17601770
>>> mapping = {"item_ref_1": {"new_key": "foo"}, "item_ref_2": {"some_value": 123, "camera_params": {...}}}
@@ -1764,7 +1774,11 @@ def update_item_metadata(self, mapping: Dict[str, dict]):
17641774
A dictionary outlining success or failures.
17651775
"""
17661776
mm = MetadataManager(
1767-
self.id, self._client, mapping, ExportMetadataType.DATASET_ITEMS
1777+
self.id,
1778+
self._client,
1779+
mapping,
1780+
ExportMetadataType.DATASET_ITEMS,
1781+
asynchronous,
17681782
)
17691783
return mm.update()
17701784

nucleus/metadata_manager.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from .camera_params import CameraParams
55
from .constants import CAMERA_PARAMS_KEY
6+
from .job import AsyncJob
67

78
if TYPE_CHECKING:
89
from . import NucleusClient
@@ -26,11 +27,19 @@ def __init__(
2627
client: "NucleusClient",
2728
raw_mappings: Dict[str, dict],
2829
level: ExportMetadataType,
30+
asynchronous: bool,
2931
):
3032
self.dataset_id = dataset_id
3133
self._client = client
3234
self.raw_mappings = raw_mappings
3335
self.level = level
36+
self.asynchronous = asynchronous
37+
38+
if len(self.raw_mappings) > 500 and not self.asynchronous:
39+
raise Exception(
40+
"Number of items to update is too large to perform it synchronously. "
41+
"Consider running the metadata_update with `asynchronous=True`, to avoid timeouts."
42+
)
3443

3544
self._payload = self._format_mappings()
3645

@@ -55,7 +64,19 @@ def _format_mappings(self):
5564

5665
def update(self):
5766
payload = {"metadata": self._payload, "level": self.level.value}
58-
resp = self._client.make_request(
59-
payload=payload, route=f"dataset/{self.dataset_id}/metadata"
60-
)
61-
return resp
67+
is_async = int(self.asynchronous)
68+
try:
69+
resp = self._client.make_request(
70+
payload=payload,
71+
route=f"dataset/{self.dataset_id}/metadata?async={is_async}",
72+
)
73+
if self.asynchronous:
74+
return AsyncJob.from_json(resp, self._client)
75+
return resp
76+
except Exception as e: # pylint: disable=W0703
77+
print(
78+
"Failed to complete the request. If a timeout occurred, consider running the "
79+
"metadata_update with `asynchronous=True`."
80+
)
81+
print(f"Request failed with:\n\n{e}")
82+
return None

0 commit comments

Comments
 (0)