add width/height for dataset item (#409)

jean-lucas · web-flow · commit 4aadb82378cd · 2023-11-16T13:30:01.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,13 +5,23 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.8) - 2023-11-13
+
+## [0.16.8](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.8) - 2023-11-16
 
 ### Added
 
+#### Dataset Item width and height
+- Allow passing width and height to `DatasetItem`
+- This is _required_ when using privacy mode
+
+#### Dataset Item Fetch
 - Added `dataset.items_and_annotation_chip_generator()` functionality to generate chips of images in s3 or locally.
 - Added `query` parameter for `dataset.items_and_annotation_generator()` to filter dataset items.
 
+### Removed
+- `upload_to_scale` is no longer a property in `DatasetItem`, users should instead specify `use_privacy_mode` on the dataset during creation
+
+
 ## [0.16.7](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.7) - 2023-11-03
 
 ### Added
diff --git a/nucleus/__init__.py b/nucleus/__init__.py
@@ -483,7 +483,13 @@ def create_dataset(
             },
             "dataset/create",
         )
-        return Dataset(response[DATASET_ID_KEY], self)
+        return Dataset(
+            response[DATASET_ID_KEY],
+            self,
+            name=name,
+            is_scene=is_scene,
+            use_privacy_mode=use_privacy_mode,
+        )
 
     def delete_dataset(self, dataset_id: str) -> dict:
         """
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -43,6 +43,7 @@
     DATASET_IS_SCENE_KEY,
     DATASET_ITEM_IDS_KEY,
     DATASET_ITEMS_KEY,
+    DATASET_PRIVACY_MODE_KEY,
     DEFAULT_ANNOTATION_UPDATE_MODE,
     EMBEDDING_DIMENSION_KEY,
     EMBEDDINGS_URL_KEY,
@@ -75,6 +76,7 @@
     DatasetItem,
     check_all_paths_remote,
     check_for_duplicate_reference_ids,
+    check_items_have_dimensions,
 )
 from .dataset_item_uploader import DatasetItemUploader
 from .deprecation_warning import deprecated
@@ -145,12 +147,20 @@ class Dataset:
         existing_dataset = client.get_dataset("YOUR_DATASET_ID")
     """
 
-    def __init__(self, dataset_id, client: "NucleusClient", name=None):
+    def __init__(
+        self,
+        dataset_id,
+        client: "NucleusClient",
+        name=None,
+        is_scene=None,
+        use_privacy_mode=None,
+    ):
         self.id = dataset_id
         self._client = client
         # NOTE: Optionally set name on creation such that the property access doesn't need to hit the server
         self._name = name
-        self._is_scene = None
+        self._is_scene = is_scene
+        self._use_privacy_mode = use_privacy_mode
 
     def __repr__(self):
         if os.environ.get("NUCLEUS_DEBUG", None):
@@ -184,6 +194,17 @@ def is_scene(self) -> bool:
         self._is_scene = response
         return self._is_scene  # type: ignore
 
+    @property
+    def use_privacy_mode(self) -> bool:
+        """Whether or not the dataset was created for privacy mode."""
+        if self._use_privacy_mode is not None:
+            return self._use_privacy_mode
+        response = self._client.make_request(
+            {}, f"dataset/{self.id}/use_privacy_mode", requests.get
+        )[DATASET_PRIVACY_MODE_KEY]
+        self._use_privacy_mode = response
+        return self._use_privacy_mode  # type: ignore
+
     @property
     def model_runs(self) -> List[str]:
         """List of all model runs associated with the Dataset."""
@@ -656,6 +677,9 @@ def append(
 
         check_for_duplicate_reference_ids(dataset_items)
 
+        if self.use_privacy_mode:
+            check_items_have_dimensions(dataset_items)
+
         if dataset_items and (lidar_scenes or video_scenes):
             raise Exception(
                 "You must append either DatasetItems or Scenes to the dataset."
diff --git a/nucleus/dataset_item.py b/nucleus/dataset_item.py
@@ -12,6 +12,7 @@
     CAMERA_PARAMS_KEY,
     EMBEDDING_INFO_KEY,
     EMBEDDING_VECTOR_KEY,
+    HEIGHT_KEY,
     IMAGE_URL_KEY,
     INDEX_ID_KEY,
     METADATA_KEY,
@@ -20,6 +21,7 @@
     REFERENCE_ID_KEY,
     TYPE_KEY,
     URL_KEY,
+    WIDTH_KEY,
 )
 
 
@@ -120,6 +122,8 @@ class DatasetItem:  # pylint: disable=R0902
     metadata: Optional[dict] = None
     pointcloud_location: Optional[str] = None
     embedding_info: Optional[DatasetItemEmbeddingInfo] = None
+    width: Optional[int] = None
+    height: Optional[int] = None
 
     def __post_init__(self):
         assert self.reference_id != "DUMMY_VALUE", "reference_id is required."
@@ -190,6 +194,12 @@ def to_payload(self, is_scene=False) -> dict:
         if self.embedding_info:
             payload[EMBEDDING_INFO_KEY] = self.embedding_info.to_payload()
 
+        if self.width:
+            payload[WIDTH_KEY] = self.width
+
+        if self.height:
+            payload[HEIGHT_KEY] = self.height
+
         if is_scene:
             if self.image_location:
                 payload[URL_KEY] = self.image_location
@@ -237,3 +247,13 @@ def check_for_duplicate_reference_ids(dataset_items: Sequence[DatasetItem]):
         raise ValueError(
             f"Duplicate reference IDs found among dataset_items: {duplicates}"
         )
+
+
+def check_items_have_dimensions(dataset_items: Sequence[DatasetItem]):
+    for item in dataset_items:
+        has_width = getattr(item, "width")
+        has_height = getattr(item, "height")
+        if not (has_width and has_height):
+            raise Exception(
+                f"When using privacy mode, all items require a width and height. Missing for item: '{item.reference_id}'"
+            )
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"]  # Easy ignore for getting it running
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.16.7"
+version = "0.16.8"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]