Video Privacy Mode (#294)

cmpajot · web-flow · commit e99dee2d30d5 · 2022-05-06T15:44:46.000-07:00
* initial commits

* fixes

* fixes

* lint

* version and changelog
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,9 +5,20 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.10.6](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.6) - 2022-05-06
+
+### Added
+
+- Video privacy mode
+
+### Changed
+
+- Removed attachment_type argument in video upload API
+
 ## [0.10.5](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.5) - 2022-05-04
 
 ### Fixed
+
 - Invalid polygons are dropped from PolygonMetric iou matching
 
 ## [0.10.4](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.4)) - 2022-05-02
@@ -20,12 +31,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [0.10.3](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.3) - 2022-04-22
 
 ### Fixed
+
 - Polygon and bounding box matching uses Shapely again providing faster evaluations
 - Evaluation function passing fixed for Polygon and Boundingbox configurations
 
 ## [0.10.1](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.1) - 2022-04-21
 
 ### Added
+
 - Added check for payload size
 
 ## [0.10.0](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.0)) - 2022-04-21
diff --git a/nucleus/constants.py b/nucleus/constants.py
@@ -126,7 +126,6 @@
 VIDEO_LOCATION_KEY = "video_location"
 VIDEO_URL_KEY = "video_url"
 VISIBLE_KEY = "visible"
-VIDEO_UPLOAD_TYPE_KEY = "video_upload_type"
 WIDTH_KEY = "width"
 YAW_KEY = "yaw"
 W_KEY = "w"
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -33,6 +33,7 @@
     EMBEDDING_DIMENSION_KEY,
     EMBEDDINGS_URL_KEY,
     EXPORTED_ROWS,
+    FRAME_RATE_KEY,
     ITEMS_KEY,
     KEEP_HISTORY_KEY,
     MESSAGE_KEY,
@@ -41,7 +42,7 @@
     REQUEST_ID_KEY,
     SLICE_ID_KEY,
     UPDATE_KEY,
-    VIDEO_UPLOAD_TYPE_KEY,
+    VIDEO_URL_KEY,
 )
 from .data_transfer_object.dataset_info import DatasetInfo
 from .data_transfer_object.dataset_size import DatasetSize
@@ -1208,7 +1209,7 @@ def get_scene(self, reference_id: str) -> Scene:
             route=f"dataset/{self.id}/scene/{reference_id}",
             requests_command=requests.get,
         )
-        if VIDEO_UPLOAD_TYPE_KEY in response:
+        if FRAME_RATE_KEY in response or VIDEO_URL_KEY in response:
             return VideoScene.from_json(response)
         return LidarScene.from_json(response)
 
diff --git a/nucleus/scene.py b/nucleus/scene.py
@@ -1,7 +1,6 @@
 import json
 from abc import ABC
 from dataclasses import dataclass, field
-from enum import Enum
 from typing import Any, Dict, List, Optional, Union
 
 from nucleus.constants import (
@@ -13,8 +12,8 @@
     NUM_SENSORS_KEY,
     POINTCLOUD_LOCATION_KEY,
     REFERENCE_ID_KEY,
+    UPLOAD_TO_SCALE_KEY,
     VIDEO_LOCATION_KEY,
-    VIDEO_UPLOAD_TYPE_KEY,
     VIDEO_URL_KEY,
 )
 
@@ -414,11 +413,6 @@ def flatten(t):
     return [item for sublist in t for item in sublist]
 
 
-class _VideoUploadType(Enum):
-    IMAGE = "image"
-    VIDEO = "video"
-
-
 @dataclass
 class VideoScene(ABC):
     """Video or sequence of images over time.
@@ -440,29 +434,33 @@ class VideoScene(ABC):
 
     Parameters:
         reference_id (str): User-specified identifier to reference the scene.
-        attachment_type (str): The type of attachments being uploaded as a string literal.
-            If the video is uploaded as an array of frames, the attachment_type is "image".
-            If the video is uploaded as an MP4, the attachment_type is "video".
-        frame_rate (Optional[int]): Required if attachment_type is "image". Frame rate of the video.
-        video_location (Optional[str]): Required if attachment_type is "video". The remote URL
+        frame_rate (Optional[int]): Required if uploading items. Frame rate of the video.
+        video_location (Optional[str]): Required if not uploading items. The remote URL
             containing the video MP4. Remote formats supported include any URL (``http://``
             or ``https://``) or URIs for AWS S3, Azure, or GCS (i.e. ``s3://``, ``gcs://``).
-        items (Optional[List[:class:`DatasetItem`]]): Required if attachment_type is "image".
+        items (Optional[List[:class:`DatasetItem`]]): Required if not uploading video_location.
             List of items representing frames, to be a part of the scene. A scene can be created
             before items have been added to it, but must be non-empty when uploading to
             a :class:`Dataset`. A video scene can contain a maximum of 3000 items.
         metadata (Optional[Dict]): Optional metadata to include with the scene.
+        upload_to_scale (Optional[bool]): Set this to false in order to use
+            `privacy mode <https://nucleus.scale.com/docs/privacy-mode>`_. If using privacy mode
+            you must upload both a video_location and items to the VideoScene.
+
+            Setting this to false means the actual data within the video scene will not be
+            uploaded to scale meaning that you can send in links that are only accessible
+            to certain users, and not to Scale.
 
     Refer to our `guide to uploading video data
     <https://nucleus.scale.com/docs/uploading-video-data>`_ for more info!
     """
 
     reference_id: str
-    attachment_type: _VideoUploadType
     frame_rate: Optional[int] = None
     video_location: Optional[str] = None
     items: List[DatasetItem] = field(default_factory=list)
     metadata: Optional[dict] = field(default_factory=dict)
+    upload_to_scale: Optional[bool] = True
 
     def __post_init__(self):
         if self.metadata is None:
@@ -480,25 +478,44 @@ def __eq__(self, other):
 
     @property
     def length(self) -> int:
-        """Gets number of items in the scene for videos uploaded as an array of images."""
+        """Gets number of items in the scene for videos uploaded with an array of images."""
         assert (
-            self.video_location is None
-        ), "Videos uploaded as an mp4 have no length"
+            not self.upload_to_scale or not self.video_location
+        ), "Only videos with items have a length"
         return len(self.items)
 
     def validate(self):
         # TODO: make private
-        assert self.attachment_type in ("image", "video")
-        if self.attachment_type == "image":
+        assert (
+            self.items or self.video_location
+        ), "Please upload either a video_location or an array of dataset items representing frames"
+        if self.upload_to_scale is False:
             assert (
                 self.frame_rate > 0
-            ), "When attachment_type='image' frame rate must be at least 1"
+            ), "When using privacy mode frame rate must be at least 1"
             assert (
                 self.items and self.length > 0
-            ), "When attachment_type='image' scene must have a list of items of length at least 1"
+            ), "When using privacy mode scene must have a list of items of length at least 1"
+            for item in self.items:
+                assert isinstance(
+                    item, DatasetItem
+                ), "Each item in a scene must be a DatasetItem object"
+                assert (
+                    item.image_location is not None
+                ), "Each item in a video scene must have an image_location"
+                assert (
+                    item.upload_to_scale is not False
+                ), "Please specify whether to upload to scale in the VideoScene for videos"
+        elif self.items:
+            assert (
+                self.frame_rate > 0
+            ), "When uploading an array of items frame rate must be at least 1"
+            assert (
+                self.length > 0
+            ), "When uploading an array of items scene must have a list of items of length at least 1"
             assert (
                 not self.video_location
-            ), "No video location is accepted when attachment_type='image'"
+            ), "No video location is accepted when uploading an array of items unless you are using privacy mode"
             for item in self.items:
                 assert isinstance(
                     item, DatasetItem
@@ -508,17 +525,14 @@ def validate(self):
                 ), "Each item in a video scene must have an image_location"
                 assert (
                     item.upload_to_scale is not False
-                ), "Skipping upload to Scale is not currently implemented for videos"
-        if self.attachment_type == "video":
-            assert (
-                self.video_location
-            ), "When attachment_type='video' a video_location is required"
+                ), "Please specify whether to upload to scale in the VideoScene for videos"
+        else:
             assert (
                 not self.frame_rate
-            ), "No frame rate is accepted when attachment_type='video'"
+            ), "No frame rate is accepted when uploading a video_location"
             assert (
                 not self.items
-            ), "No list of items is accepted when attachment_type='video'"
+            ), "No list of items is accepted when uploading a video_location unless you are using privacy mode"
 
     def add_item(
         self, item: DatasetItem, index: int = None, update: bool = False
@@ -532,8 +546,8 @@ def add_item(
               exists. Default is False.
         """
         assert (
-            self.video_location is None
-        ), "Cannot add item to a video uploaded as an mp4"
+            not self.upload_to_scale or not self.video_location
+        ), "Cannot add item to a video without items"
         if index is None:
             index = len(self.items)
         assert (
@@ -553,8 +567,8 @@ def get_item(self, index: int) -> DatasetItem:
         Return:
             :class:`DatasetItem`: DatasetItem at the specified index."""
         assert (
-            self.video_location is None
-        ), "Cannot get item from a video uploaded as an mp4"
+            not self.upload_to_scale or not self.video_location
+        ), "Cannot add item to a video without items"
         if index < 0 or index > len(self.items):
             raise ValueError(
                 f"This scene does not have an item at index {index}"
@@ -568,8 +582,8 @@ def get_items(self) -> List[DatasetItem]:
             List[:class:`DatasetItem`]: List of DatasetItems, sorted by index ascending.
         """
         assert (
-            self.video_location is None
-        ), "Cannot get items from a video uploaded as an mp4"
+            not self.upload_to_scale or not self.video_location
+        ), "Cannot add item to a video without items"
         return self.items
 
     def info(self):
@@ -594,6 +608,8 @@ def info(self):
             payload[VIDEO_URL_KEY] = self.video_location
         if self.items:
             payload[LENGTH_KEY] = self.length
+        if self.upload_to_scale:
+            payload[UPLOAD_TO_SCALE_KEY] = self.upload_to_scale
 
         return payload
 
@@ -605,18 +621,17 @@ def from_json(cls, payload: dict):
         return cls(
             reference_id=payload[REFERENCE_ID_KEY],
             frame_rate=payload.get(FRAME_RATE_KEY, None),
-            attachment_type=payload[VIDEO_UPLOAD_TYPE_KEY],
             items=items,
             metadata=payload.get(METADATA_KEY, {}),
             video_location=payload.get(VIDEO_URL_KEY, None),
+            upload_to_scale=payload.get(UPLOAD_TO_SCALE_KEY, True),
         )
 
     def to_payload(self) -> dict:
         """Serializes scene object to schematized JSON dict."""
         self.validate()
         payload: Dict[str, Any] = {
             REFERENCE_ID_KEY: self.reference_id,
-            VIDEO_UPLOAD_TYPE_KEY: self.attachment_type,
         }
         if self.frame_rate:
             payload[FRAME_RATE_KEY] = self.frame_rate
@@ -629,6 +644,8 @@ def to_payload(self) -> dict:
                 item.to_payload(is_scene=True) for item in self.items
             ]
             payload[FRAMES_KEY] = items_payload
+        if self.upload_to_scale is not None:
+            payload[UPLOAD_TO_SCALE_KEY] = self.upload_to_scale
         return payload
 
     def to_json(self) -> str:
@@ -647,7 +664,7 @@ def check_all_scene_paths_remote(
                     f"All paths for videos must be remote, but {scene.video_location} is either "
                     "local, or a remote URL type that is not supported."
                 )
-        else:
+        if isinstance(scene, LidarScene) or scene.items:
             for item in scene.get_items():
                 pointcloud_location = getattr(item, POINTCLOUD_LOCATION_KEY)
                 if pointcloud_location and is_local_path(pointcloud_location):
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ exclude = '''
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.10.5"
+version = "0.10.6"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -41,6 +41,13 @@
 
 TEST_VIDEO_URL = "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/video.mp4"
 
+TEST_INACCESSIBLE_VIDEO_URL = "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/video.mp4/fake-for-privacy-mode"
+
+TEST_INACCESSIBLE_IMG_URLS = [
+    "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/airplane.jpeg/fake-for-privacy-mode",
+    "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/arctichare.jpeg/fake-for-privacy-mode",
+]
+
 TEST_LIDAR_SCENES = {
     "scenes": [
         {
@@ -90,7 +97,6 @@
     "scenes": [
         {
             "reference_id": "scene_1",
-            "video_upload_type": "image",
             "frame_rate": 15,
             "frames": [
                 {
@@ -110,10 +116,30 @@
         },
         {
             "reference_id": "scene_2",
-            "video_upload_type": "video",
             "video_url": TEST_VIDEO_URL,
             "metadata": {"timestamp": "1234", "weather": "rainy"},
         },
+        {
+            "reference_id": "scene_3",
+            "video_url": TEST_INACCESSIBLE_VIDEO_URL,
+            "frame_rate": 15,
+            "frames": [
+                {
+                    "image_url": TEST_INACCESSIBLE_IMG_URLS[0],
+                    "type": "image",
+                    "reference_id": "video_frame_2",
+                    "metadata": {"time": 123, "foo": "bar"},
+                },
+                {
+                    "image_url": TEST_INACCESSIBLE_IMG_URLS[1],
+                    "type": "image",
+                    "reference_id": "video_frame_3",
+                    "metadata": {"time": 124, "foo": "bar_2"},
+                },
+            ],
+            "metadata": {"timestamp": "1234", "weather": "rainy"},
+            "upload_to_scale": False,
+        },
     ],
     "update": False,
 }
@@ -122,7 +148,6 @@
     "scenes": [
         {
             "reference_id": "scene_1",
-            "video_upload_type": "image",
             "frame_rate": 15,
             "frames": [
                 {
@@ -148,7 +173,6 @@
     "scenes": [
         {
             "reference_id": "scene_1",
-            "video_upload_type": "image",
             "frame_rate": 15,
             "frames": [
                 {
@@ -168,13 +192,11 @@
         },
         {
             "reference_id": "scene_2",
-            "video_upload_type": "video",
             "video_url": TEST_IMG_URLS[0],
             "metadata": {"timestamp": "1234", "weather": "rainy"},
         },
         {
             "reference_id": "scene_3",
-            "video_upload_type": "video",
             "video_url": TEST_VIDEO_URL + "nonsense",
             "metadata": {"timestamp": "1234", "weather": "rainy"},
         },
diff --git a/tests/test_scene.py b/tests/test_scene.py