Skip to content

Commit 4aadb82

Browse files
authored
add width/height for dataset item (#409)
1 parent e2713d7 commit 4aadb82

File tree

5 files changed

+65
-5
lines changed

5 files changed

+65
-5
lines changed

CHANGELOG.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,23 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8-
## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.8) - 2023-11-13
8+
9+
## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.8) - 2023-11-16
910

1011
### Added
1112

13+
#### Dataset Item width and height
14+
- Allow passing width and height to `DatasetItem`
15+
- This is _required_ when using privacy mode
16+
17+
#### Dataset Item Fetch
1218
- Added `dataset.items_and_annotation_chip_generator()` functionality to generate chips of images in s3 or locally.
1319
- Added `query` parameter for `dataset.items_and_annotation_generator()` to filter dataset items.
1420

21+
### Removed
22+
- `upload_to_scale` is no longer a property in `DatasetItem`, users should instead specify `use_privacy_mode` on the dataset during creation
23+
24+
1525
## [0.16.7](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.7) - 2023-11-03
1626

1727
### Added

nucleus/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,13 @@ def create_dataset(
483483
},
484484
"dataset/create",
485485
)
486-
return Dataset(response[DATASET_ID_KEY], self)
486+
return Dataset(
487+
response[DATASET_ID_KEY],
488+
self,
489+
name=name,
490+
is_scene=is_scene,
491+
use_privacy_mode=use_privacy_mode,
492+
)
487493

488494
def delete_dataset(self, dataset_id: str) -> dict:
489495
"""

nucleus/dataset.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
DATASET_IS_SCENE_KEY,
4444
DATASET_ITEM_IDS_KEY,
4545
DATASET_ITEMS_KEY,
46+
DATASET_PRIVACY_MODE_KEY,
4647
DEFAULT_ANNOTATION_UPDATE_MODE,
4748
EMBEDDING_DIMENSION_KEY,
4849
EMBEDDINGS_URL_KEY,
@@ -75,6 +76,7 @@
7576
DatasetItem,
7677
check_all_paths_remote,
7778
check_for_duplicate_reference_ids,
79+
check_items_have_dimensions,
7880
)
7981
from .dataset_item_uploader import DatasetItemUploader
8082
from .deprecation_warning import deprecated
@@ -145,12 +147,20 @@ class Dataset:
145147
existing_dataset = client.get_dataset("YOUR_DATASET_ID")
146148
"""
147149

148-
def __init__(self, dataset_id, client: "NucleusClient", name=None):
150+
def __init__(
151+
self,
152+
dataset_id,
153+
client: "NucleusClient",
154+
name=None,
155+
is_scene=None,
156+
use_privacy_mode=None,
157+
):
149158
self.id = dataset_id
150159
self._client = client
151160
# NOTE: Optionally set name on creation such that the property access doesn't need to hit the server
152161
self._name = name
153-
self._is_scene = None
162+
self._is_scene = is_scene
163+
self._use_privacy_mode = use_privacy_mode
154164

155165
def __repr__(self):
156166
if os.environ.get("NUCLEUS_DEBUG", None):
@@ -184,6 +194,17 @@ def is_scene(self) -> bool:
184194
self._is_scene = response
185195
return self._is_scene # type: ignore
186196

197+
@property
198+
def use_privacy_mode(self) -> bool:
199+
"""Whether or not the dataset was created for privacy mode."""
200+
if self._use_privacy_mode is not None:
201+
return self._use_privacy_mode
202+
response = self._client.make_request(
203+
{}, f"dataset/{self.id}/use_privacy_mode", requests.get
204+
)[DATASET_PRIVACY_MODE_KEY]
205+
self._use_privacy_mode = response
206+
return self._use_privacy_mode # type: ignore
207+
187208
@property
188209
def model_runs(self) -> List[str]:
189210
"""List of all model runs associated with the Dataset."""
@@ -656,6 +677,9 @@ def append(
656677

657678
check_for_duplicate_reference_ids(dataset_items)
658679

680+
if self.use_privacy_mode:
681+
check_items_have_dimensions(dataset_items)
682+
659683
if dataset_items and (lidar_scenes or video_scenes):
660684
raise Exception(
661685
"You must append either DatasetItems or Scenes to the dataset."

nucleus/dataset_item.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
CAMERA_PARAMS_KEY,
1313
EMBEDDING_INFO_KEY,
1414
EMBEDDING_VECTOR_KEY,
15+
HEIGHT_KEY,
1516
IMAGE_URL_KEY,
1617
INDEX_ID_KEY,
1718
METADATA_KEY,
@@ -20,6 +21,7 @@
2021
REFERENCE_ID_KEY,
2122
TYPE_KEY,
2223
URL_KEY,
24+
WIDTH_KEY,
2325
)
2426

2527

@@ -120,6 +122,8 @@ class DatasetItem: # pylint: disable=R0902
120122
metadata: Optional[dict] = None
121123
pointcloud_location: Optional[str] = None
122124
embedding_info: Optional[DatasetItemEmbeddingInfo] = None
125+
width: Optional[int] = None
126+
height: Optional[int] = None
123127

124128
def __post_init__(self):
125129
assert self.reference_id != "DUMMY_VALUE", "reference_id is required."
@@ -190,6 +194,12 @@ def to_payload(self, is_scene=False) -> dict:
190194
if self.embedding_info:
191195
payload[EMBEDDING_INFO_KEY] = self.embedding_info.to_payload()
192196

197+
if self.width:
198+
payload[WIDTH_KEY] = self.width
199+
200+
if self.height:
201+
payload[HEIGHT_KEY] = self.height
202+
193203
if is_scene:
194204
if self.image_location:
195205
payload[URL_KEY] = self.image_location
@@ -237,3 +247,13 @@ def check_for_duplicate_reference_ids(dataset_items: Sequence[DatasetItem]):
237247
raise ValueError(
238248
f"Duplicate reference IDs found among dataset_items: {duplicates}"
239249
)
250+
251+
252+
def check_items_have_dimensions(dataset_items: Sequence[DatasetItem]):
253+
for item in dataset_items:
254+
has_width = getattr(item, "width")
255+
has_height = getattr(item, "height")
256+
if not (has_width and has_height):
257+
raise Exception(
258+
f"When using privacy mode, all items require a width and height. Missing for item: '{item.reference_id}'"
259+
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"] # Easy ignore for getting it running
2525

2626
[tool.poetry]
2727
name = "scale-nucleus"
28-
version = "0.16.7"
28+
version = "0.16.8"
2929
description = "The official Python client library for Nucleus, the Data Platform for AI"
3030
license = "MIT"
3131
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

0 commit comments

Comments
 (0)