Skip to content

Commit a651911

Browse files
authored
Add basic metadata update for scenes/items (#216)
1 parent fd3edb4 commit a651911

File tree

6 files changed

+131
-12
lines changed

6 files changed

+131
-12
lines changed

nucleus/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@
5959
INDEX_CONTINUOUS_ENABLE_KEY = "enable"
6060
ITEMS_KEY = "items"
6161
ITEM_KEY = "item"
62-
ITEMS_KEY = "items"
6362
ITEM_METADATA_SCHEMA_KEY = "item_metadata_schema"
6463
JOB_ID_KEY = "job_id"
6564
KEEP_HISTORY_KEY = "keep_history"

nucleus/dataset.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
from .dataset_item_uploader import DatasetItemUploader
6060
from .deprecation_warning import deprecated
6161
from .errors import DatasetItemRetrievalError
62+
from .metadata_manager import ExportMetadataType, MetadataManager
6263
from .payload_constructor import (
6364
construct_append_scenes_payload,
6465
construct_model_run_creation_payload,
@@ -1392,3 +1393,45 @@ def _upload_items(
13921393

13931394
populator = DatasetItemUploader(self.id, self._client)
13941395
return populator.upload(dataset_items, batch_size, update)
1396+
1397+
def update_scene_metadata(self, mapping: Dict[str, dict]):
1398+
"""
1399+
Update (merge) scene metadata for each reference_id given in the mapping.
1400+
The backed will join the specified mapping metadata to the exisiting metadata.
1401+
If there is a key-collision, the value given in the mapping will take precedence.
1402+
1403+
Args:
1404+
mapping: key-value pair of <reference_id>: <metadata>
1405+
1406+
Examples:
1407+
>>> mapping = {"scene_ref_1": {"new_key": "foo"}, "scene_ref_2": {"some_value": 123}}
1408+
>>> dataset.update_scene_metadata(mapping)
1409+
1410+
Returns:
1411+
A dictionary outlining success or failures.
1412+
"""
1413+
mm = MetadataManager(
1414+
self.id, self._client, mapping, ExportMetadataType.SCENES
1415+
)
1416+
return mm.update()
1417+
1418+
def update_item_metadata(self, mapping: Dict[str, dict]):
1419+
"""
1420+
Update (merge) dataset item metadata for each reference_id given in the mapping.
1421+
The backed will join the specified mapping metadata to the exisiting metadata.
1422+
If there is a key-collision, the value given in the mapping will take precedence.
1423+
1424+
Args:
1425+
mapping: key-value pair of <reference_id>: <metadata>
1426+
1427+
Examples:
1428+
>>> mapping = {"item_ref_1": {"new_key": "foo"}, "item_ref_2": {"some_value": 123}}
1429+
>>> dataset.update_item_metadata(mapping)
1430+
1431+
Returns:
1432+
A dictionary outlining success or failures.
1433+
"""
1434+
mm = MetadataManager(
1435+
self.id, self._client, mapping, ExportMetadataType.DATASET_ITEMS
1436+
)
1437+
return mm.update()

nucleus/metadata_manager.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from enum import Enum
2+
from typing import TYPE_CHECKING, Dict
3+
4+
if TYPE_CHECKING:
5+
from . import NucleusClient
6+
7+
8+
# Wording set to match with backend enum
9+
class ExportMetadataType(Enum):
10+
SCENES = "scene"
11+
DATASET_ITEMS = "item"
12+
13+
14+
class MetadataManager:
15+
"""
16+
Helper class for managing metadata updates on a scene or dataset item.
17+
Do not call directly, use the dataset class methods: `update_scene_metadata` or `update_item_metadata`
18+
"""
19+
20+
def __init__(
21+
self,
22+
dataset_id: str,
23+
client: "NucleusClient",
24+
raw_mappings: Dict[str, dict],
25+
level: ExportMetadataType,
26+
):
27+
self.dataset_id = dataset_id
28+
self._client = client
29+
self.raw_mappings = raw_mappings
30+
self.level = level
31+
32+
self._payload = self._format_mappings()
33+
34+
def _format_mappings(self):
35+
payload = []
36+
for ref_id, meta in self.raw_mappings.items():
37+
payload.append({"reference_id": ref_id, "metadata": meta})
38+
return payload
39+
40+
def update(self):
41+
payload = {"metadata": self._payload, "level": self.level.value}
42+
resp = self._client.make_request(
43+
payload=payload, route=f"dataset/{self.dataset_id}/metadata"
44+
)
45+
return resp

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.6.1"
24+
version = "0.6.2"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

tests/test_dataset.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,9 @@
11
import copy
22
import math
3-
import os
43

54
import pytest
65

7-
from nucleus import (
8-
Dataset,
9-
DatasetItem,
10-
NucleusAPIError,
11-
NucleusClient,
12-
UploadResponse,
13-
)
6+
from nucleus import Dataset, DatasetItem, NucleusClient, UploadResponse
147
from nucleus.annotation import (
158
BoxAnnotation,
169
CategoryAnnotation,
@@ -34,8 +27,6 @@
3427
UPDATED_ITEMS,
3528
)
3629
from nucleus.job import AsyncJob, JobError
37-
from nucleus.model import Model
38-
from nucleus.prediction import BoxPrediction
3930

4031
from .helpers import (
4132
LOCAL_FILENAME,
@@ -556,3 +547,22 @@ def sort_labelmap(segmentation_annotation):
556547
exported[0][ANNOTATIONS_KEY][MULTICATEGORY_TYPE][0]
557548
== multicategory_annotation
558549
)
550+
551+
552+
def test_dataset_item_metadata_update(dataset):
553+
items = make_dataset_items()
554+
dataset.append(items)
555+
556+
expected_metadata = {}
557+
new_metadata = {}
558+
for item in dataset.items:
559+
data = {"a_new_key": 123}
560+
new_metadata[item.reference_id] = data
561+
expected_metadata[item.reference_id] = {**item.metadata, **data}
562+
563+
dataset.update_item_metadata(new_metadata)
564+
actual_metadata = {
565+
item.reference_id: item.metadata for item in dataset.items
566+
}
567+
568+
assert actual_metadata == expected_metadata

tests/test_scene.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,3 +477,25 @@ def test_scene_upload_async_item_dataset(dataset_item):
477477

478478
with pytest.raises(Exception):
479479
dataset_item.append(scenes, update=update, asynchronous=True)
480+
481+
482+
@pytest.mark.integration
483+
def test_scene_metadata_update(dataset_scene):
484+
payload = TEST_LIDAR_SCENES
485+
scenes = [
486+
LidarScene.from_json(scene_json) for scene_json in payload[SCENES_KEY]
487+
]
488+
update = payload[UPDATE_KEY]
489+
490+
job = dataset_scene.append(scenes, update=update, asynchronous=True)
491+
job.sleep_until_complete()
492+
493+
scene_ref_id = scenes[0].reference_id
494+
additional_metadata = {"some_new_key": 123}
495+
dataset_scene.update_scene_metadata({scene_ref_id: additional_metadata})
496+
497+
expected_new_metadata = {**scenes[0].metadata, **additional_metadata}
498+
499+
updated_scene = dataset_scene.get_scene(scene_ref_id)
500+
actual_metadata = updated_scene.metadata
501+
assert expected_new_metadata == actual_metadata

0 commit comments

Comments
 (0)