Skip to content

Commit e99dee2

Browse files
authored
Video Privacy Mode (#294)
* initial commits * fixes * fixes * lint * version and changelog
1 parent a4cdaca commit e99dee2

File tree

7 files changed

+104
-52
lines changed

7 files changed

+104
-52
lines changed

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,20 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.10.6](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.6) - 2022-05-06
9+
10+
### Added
11+
12+
- Video privacy mode
13+
14+
### Changed
15+
16+
- Removed attachment_type argument in video upload API
17+
818
## [0.10.5](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.5) - 2022-05-04
919

1020
### Fixed
21+
1122
- Invalid polygons are dropped from PolygonMetric iou matching
1223

1324
## [0.10.4](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.4)) - 2022-05-02
@@ -20,12 +31,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2031
## [0.10.3](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.3) - 2022-04-22
2132

2233
### Fixed
34+
2335
- Polygon and bounding box matching uses Shapely again providing faster evaluations
2436
- Evaluation function passing fixed for Polygon and Boundingbox configurations
2537

2638
## [0.10.1](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.1) - 2022-04-21
2739

2840
### Added
41+
2942
- Added check for payload size
3043

3144
## [0.10.0](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.10.0)) - 2022-04-21

nucleus/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@
126126
VIDEO_LOCATION_KEY = "video_location"
127127
VIDEO_URL_KEY = "video_url"
128128
VISIBLE_KEY = "visible"
129-
VIDEO_UPLOAD_TYPE_KEY = "video_upload_type"
130129
WIDTH_KEY = "width"
131130
YAW_KEY = "yaw"
132131
W_KEY = "w"

nucleus/dataset.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
EMBEDDING_DIMENSION_KEY,
3434
EMBEDDINGS_URL_KEY,
3535
EXPORTED_ROWS,
36+
FRAME_RATE_KEY,
3637
ITEMS_KEY,
3738
KEEP_HISTORY_KEY,
3839
MESSAGE_KEY,
@@ -41,7 +42,7 @@
4142
REQUEST_ID_KEY,
4243
SLICE_ID_KEY,
4344
UPDATE_KEY,
44-
VIDEO_UPLOAD_TYPE_KEY,
45+
VIDEO_URL_KEY,
4546
)
4647
from .data_transfer_object.dataset_info import DatasetInfo
4748
from .data_transfer_object.dataset_size import DatasetSize
@@ -1208,7 +1209,7 @@ def get_scene(self, reference_id: str) -> Scene:
12081209
route=f"dataset/{self.id}/scene/{reference_id}",
12091210
requests_command=requests.get,
12101211
)
1211-
if VIDEO_UPLOAD_TYPE_KEY in response:
1212+
if FRAME_RATE_KEY in response or VIDEO_URL_KEY in response:
12121213
return VideoScene.from_json(response)
12131214
return LidarScene.from_json(response)
12141215

nucleus/scene.py

Lines changed: 55 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import json
22
from abc import ABC
33
from dataclasses import dataclass, field
4-
from enum import Enum
54
from typing import Any, Dict, List, Optional, Union
65

76
from nucleus.constants import (
@@ -13,8 +12,8 @@
1312
NUM_SENSORS_KEY,
1413
POINTCLOUD_LOCATION_KEY,
1514
REFERENCE_ID_KEY,
15+
UPLOAD_TO_SCALE_KEY,
1616
VIDEO_LOCATION_KEY,
17-
VIDEO_UPLOAD_TYPE_KEY,
1817
VIDEO_URL_KEY,
1918
)
2019

@@ -414,11 +413,6 @@ def flatten(t):
414413
return [item for sublist in t for item in sublist]
415414

416415

417-
class _VideoUploadType(Enum):
418-
IMAGE = "image"
419-
VIDEO = "video"
420-
421-
422416
@dataclass
423417
class VideoScene(ABC):
424418
"""Video or sequence of images over time.
@@ -440,29 +434,33 @@ class VideoScene(ABC):
440434
441435
Parameters:
442436
reference_id (str): User-specified identifier to reference the scene.
443-
attachment_type (str): The type of attachments being uploaded as a string literal.
444-
If the video is uploaded as an array of frames, the attachment_type is "image".
445-
If the video is uploaded as an MP4, the attachment_type is "video".
446-
frame_rate (Optional[int]): Required if attachment_type is "image". Frame rate of the video.
447-
video_location (Optional[str]): Required if attachment_type is "video". The remote URL
437+
frame_rate (Optional[int]): Required if uploading items. Frame rate of the video.
438+
video_location (Optional[str]): Required if not uploading items. The remote URL
448439
containing the video MP4. Remote formats supported include any URL (``http://``
449440
or ``https://``) or URIs for AWS S3, Azure, or GCS (i.e. ``s3://``, ``gcs://``).
450-
items (Optional[List[:class:`DatasetItem`]]): Required if attachment_type is "image".
441+
items (Optional[List[:class:`DatasetItem`]]): Required if not uploading video_location.
451442
List of items representing frames, to be a part of the scene. A scene can be created
452443
before items have been added to it, but must be non-empty when uploading to
453444
a :class:`Dataset`. A video scene can contain a maximum of 3000 items.
454445
metadata (Optional[Dict]): Optional metadata to include with the scene.
446+
upload_to_scale (Optional[bool]): Set this to false in order to use
447+
`privacy mode <https://nucleus.scale.com/docs/privacy-mode>`_. If using privacy mode
448+
you must upload both a video_location and items to the VideoScene.
449+
450+
Setting this to false means the actual data within the video scene will not be
451+
uploaded to scale meaning that you can send in links that are only accessible
452+
to certain users, and not to Scale.
455453
456454
Refer to our `guide to uploading video data
457455
<https://nucleus.scale.com/docs/uploading-video-data>`_ for more info!
458456
"""
459457

460458
reference_id: str
461-
attachment_type: _VideoUploadType
462459
frame_rate: Optional[int] = None
463460
video_location: Optional[str] = None
464461
items: List[DatasetItem] = field(default_factory=list)
465462
metadata: Optional[dict] = field(default_factory=dict)
463+
upload_to_scale: Optional[bool] = True
466464

467465
def __post_init__(self):
468466
if self.metadata is None:
@@ -480,25 +478,44 @@ def __eq__(self, other):
480478

481479
@property
482480
def length(self) -> int:
483-
"""Gets number of items in the scene for videos uploaded as an array of images."""
481+
"""Gets number of items in the scene for videos uploaded with an array of images."""
484482
assert (
485-
self.video_location is None
486-
), "Videos uploaded as an mp4 have no length"
483+
not self.upload_to_scale or not self.video_location
484+
), "Only videos with items have a length"
487485
return len(self.items)
488486

489487
def validate(self):
490488
# TODO: make private
491-
assert self.attachment_type in ("image", "video")
492-
if self.attachment_type == "image":
489+
assert (
490+
self.items or self.video_location
491+
), "Please upload either a video_location or an array of dataset items representing frames"
492+
if self.upload_to_scale is False:
493493
assert (
494494
self.frame_rate > 0
495-
), "When attachment_type='image' frame rate must be at least 1"
495+
), "When using privacy mode frame rate must be at least 1"
496496
assert (
497497
self.items and self.length > 0
498-
), "When attachment_type='image' scene must have a list of items of length at least 1"
498+
), "When using privacy mode scene must have a list of items of length at least 1"
499+
for item in self.items:
500+
assert isinstance(
501+
item, DatasetItem
502+
), "Each item in a scene must be a DatasetItem object"
503+
assert (
504+
item.image_location is not None
505+
), "Each item in a video scene must have an image_location"
506+
assert (
507+
item.upload_to_scale is not False
508+
), "Please specify whether to upload to scale in the VideoScene for videos"
509+
elif self.items:
510+
assert (
511+
self.frame_rate > 0
512+
), "When uploading an array of items frame rate must be at least 1"
513+
assert (
514+
self.length > 0
515+
), "When uploading an array of items scene must have a list of items of length at least 1"
499516
assert (
500517
not self.video_location
501-
), "No video location is accepted when attachment_type='image'"
518+
), "No video location is accepted when uploading an array of items unless you are using privacy mode"
502519
for item in self.items:
503520
assert isinstance(
504521
item, DatasetItem
@@ -508,17 +525,14 @@ def validate(self):
508525
), "Each item in a video scene must have an image_location"
509526
assert (
510527
item.upload_to_scale is not False
511-
), "Skipping upload to Scale is not currently implemented for videos"
512-
if self.attachment_type == "video":
513-
assert (
514-
self.video_location
515-
), "When attachment_type='video' a video_location is required"
528+
), "Please specify whether to upload to scale in the VideoScene for videos"
529+
else:
516530
assert (
517531
not self.frame_rate
518-
), "No frame rate is accepted when attachment_type='video'"
532+
), "No frame rate is accepted when uploading a video_location"
519533
assert (
520534
not self.items
521-
), "No list of items is accepted when attachment_type='video'"
535+
), "No list of items is accepted when uploading a video_location unless you are using privacy mode"
522536

523537
def add_item(
524538
self, item: DatasetItem, index: int = None, update: bool = False
@@ -532,8 +546,8 @@ def add_item(
532546
exists. Default is False.
533547
"""
534548
assert (
535-
self.video_location is None
536-
), "Cannot add item to a video uploaded as an mp4"
549+
not self.upload_to_scale or not self.video_location
550+
), "Cannot add item to a video without items"
537551
if index is None:
538552
index = len(self.items)
539553
assert (
@@ -553,8 +567,8 @@ def get_item(self, index: int) -> DatasetItem:
553567
Return:
554568
:class:`DatasetItem`: DatasetItem at the specified index."""
555569
assert (
556-
self.video_location is None
557-
), "Cannot get item from a video uploaded as an mp4"
570+
not self.upload_to_scale or not self.video_location
571+
), "Cannot add item to a video without items"
558572
if index < 0 or index > len(self.items):
559573
raise ValueError(
560574
f"This scene does not have an item at index {index}"
@@ -568,8 +582,8 @@ def get_items(self) -> List[DatasetItem]:
568582
List[:class:`DatasetItem`]: List of DatasetItems, sorted by index ascending.
569583
"""
570584
assert (
571-
self.video_location is None
572-
), "Cannot get items from a video uploaded as an mp4"
585+
not self.upload_to_scale or not self.video_location
586+
), "Cannot add item to a video without items"
573587
return self.items
574588

575589
def info(self):
@@ -594,6 +608,8 @@ def info(self):
594608
payload[VIDEO_URL_KEY] = self.video_location
595609
if self.items:
596610
payload[LENGTH_KEY] = self.length
611+
if self.upload_to_scale:
612+
payload[UPLOAD_TO_SCALE_KEY] = self.upload_to_scale
597613

598614
return payload
599615

@@ -605,18 +621,17 @@ def from_json(cls, payload: dict):
605621
return cls(
606622
reference_id=payload[REFERENCE_ID_KEY],
607623
frame_rate=payload.get(FRAME_RATE_KEY, None),
608-
attachment_type=payload[VIDEO_UPLOAD_TYPE_KEY],
609624
items=items,
610625
metadata=payload.get(METADATA_KEY, {}),
611626
video_location=payload.get(VIDEO_URL_KEY, None),
627+
upload_to_scale=payload.get(UPLOAD_TO_SCALE_KEY, True),
612628
)
613629

614630
def to_payload(self) -> dict:
615631
"""Serializes scene object to schematized JSON dict."""
616632
self.validate()
617633
payload: Dict[str, Any] = {
618634
REFERENCE_ID_KEY: self.reference_id,
619-
VIDEO_UPLOAD_TYPE_KEY: self.attachment_type,
620635
}
621636
if self.frame_rate:
622637
payload[FRAME_RATE_KEY] = self.frame_rate
@@ -629,6 +644,8 @@ def to_payload(self) -> dict:
629644
item.to_payload(is_scene=True) for item in self.items
630645
]
631646
payload[FRAMES_KEY] = items_payload
647+
if self.upload_to_scale is not None:
648+
payload[UPLOAD_TO_SCALE_KEY] = self.upload_to_scale
632649
return payload
633650

634651
def to_json(self) -> str:
@@ -647,7 +664,7 @@ def check_all_scene_paths_remote(
647664
f"All paths for videos must be remote, but {scene.video_location} is either "
648665
"local, or a remote URL type that is not supported."
649666
)
650-
else:
667+
if isinstance(scene, LidarScene) or scene.items:
651668
for item in scene.get_items():
652669
pointcloud_location = getattr(item, POINTCLOUD_LOCATION_KEY)
653670
if pointcloud_location and is_local_path(pointcloud_location):

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.10.5"
24+
version = "0.10.6"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

tests/helpers.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@
4141

4242
TEST_VIDEO_URL = "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/video.mp4"
4343

44+
TEST_INACCESSIBLE_VIDEO_URL = "https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/video.mp4/fake-for-privacy-mode"
45+
46+
TEST_INACCESSIBLE_IMG_URLS = [
47+
"https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/airplane.jpeg/fake-for-privacy-mode",
48+
"https://github.com/scaleapi/nucleus-python-client/raw/master/tests/testdata/arctichare.jpeg/fake-for-privacy-mode",
49+
]
50+
4451
TEST_LIDAR_SCENES = {
4552
"scenes": [
4653
{
@@ -90,7 +97,6 @@
9097
"scenes": [
9198
{
9299
"reference_id": "scene_1",
93-
"video_upload_type": "image",
94100
"frame_rate": 15,
95101
"frames": [
96102
{
@@ -110,10 +116,30 @@
110116
},
111117
{
112118
"reference_id": "scene_2",
113-
"video_upload_type": "video",
114119
"video_url": TEST_VIDEO_URL,
115120
"metadata": {"timestamp": "1234", "weather": "rainy"},
116121
},
122+
{
123+
"reference_id": "scene_3",
124+
"video_url": TEST_INACCESSIBLE_VIDEO_URL,
125+
"frame_rate": 15,
126+
"frames": [
127+
{
128+
"image_url": TEST_INACCESSIBLE_IMG_URLS[0],
129+
"type": "image",
130+
"reference_id": "video_frame_2",
131+
"metadata": {"time": 123, "foo": "bar"},
132+
},
133+
{
134+
"image_url": TEST_INACCESSIBLE_IMG_URLS[1],
135+
"type": "image",
136+
"reference_id": "video_frame_3",
137+
"metadata": {"time": 124, "foo": "bar_2"},
138+
},
139+
],
140+
"metadata": {"timestamp": "1234", "weather": "rainy"},
141+
"upload_to_scale": False,
142+
},
117143
],
118144
"update": False,
119145
}
@@ -122,7 +148,6 @@
122148
"scenes": [
123149
{
124150
"reference_id": "scene_1",
125-
"video_upload_type": "image",
126151
"frame_rate": 15,
127152
"frames": [
128153
{
@@ -148,7 +173,6 @@
148173
"scenes": [
149174
{
150175
"reference_id": "scene_1",
151-
"video_upload_type": "image",
152176
"frame_rate": 15,
153177
"frames": [
154178
{
@@ -168,13 +192,11 @@
168192
},
169193
{
170194
"reference_id": "scene_2",
171-
"video_upload_type": "video",
172195
"video_url": TEST_IMG_URLS[0],
173196
"metadata": {"timestamp": "1234", "weather": "rainy"},
174197
},
175198
{
176199
"reference_id": "scene_3",
177-
"video_upload_type": "video",
178200
"video_url": TEST_VIDEO_URL + "nonsense",
179201
"metadata": {"timestamp": "1234", "weather": "rainy"},
180202
},

0 commit comments

Comments
 (0)