Changes for new video schema (#239)

cmpajot · web-flow · commit ed0a7969eb40 · 2022-03-08T18:53:06.000-08:00
* Switch the dataset item type

* Comment removal

* fixes

* remove video frame location

* Fix partial equality issues for test

* remote check

* Changes for deploy

* typo

* Remove unused import
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.6.7](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.6.7) - 2021-03-08
+
+### Added
+- `get_autotag_refinement_metrics`
+- Get model using `model_run_id`
+- Video API change to require `image_location` instead of `video_frame_location` in `DatasetItems`
 ## [0.6.6](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.6.6) - 2021-02-18
 
 ### Added
diff --git a/nucleus/constants.py b/nucleus/constants.py
@@ -102,9 +102,6 @@
 UPLOAD_TO_SCALE_KEY = "upload_to_scale"
 URL_KEY = "url"
 VERTICES_KEY = "vertices"
-VIDEO_FRAME_LOCATION_KEY = "video_frame_location"
-VIDEO_FRAME_URL_KEY = "video_frame_url"
-VIDEO_KEY = "video"
 VIDEO_UPLOAD_TYPE_KEY = "video_upload_type"
 WIDTH_KEY = "width"
 YAW_KEY = "yaw"
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -641,7 +641,7 @@ def _append_video_scenes(
             )
 
         if asynchronous:
-            # TODO check_all_scene_paths_remote(scenes)
+            check_all_scene_paths_remote(scenes)
             request_id = serialize_and_write_to_presigned_url(
                 scenes, self.id, self._client
             )
diff --git a/nucleus/dataset_item.py b/nucleus/dataset_item.py
@@ -22,7 +22,6 @@
     TYPE_KEY,
     UPLOAD_TO_SCALE_KEY,
     URL_KEY,
-    VIDEO_FRAME_URL_KEY,
     W_KEY,
     X_KEY,
     Y_KEY,
@@ -121,35 +120,26 @@ def to_payload(self) -> dict:
 class DatasetItemType(Enum):
     IMAGE = "image"
     POINTCLOUD = "pointcloud"
-    VIDEO = "video"
 
 
 @dataclass  # pylint: disable=R0902
 class DatasetItem:  # pylint: disable=R0902
-    """A dataset item is an image, pointcloud or video frame that has associated metadata.
+    """A dataset item is an image or pointcloud that has associated metadata.
 
     Note: for 3D data, please include a :class:`CameraParams` object under a key named
     "camera_params" within the metadata dictionary. This will allow for projecting
     3D annotations to any image within a scene.
 
     Args:
-        image_location (Optional[str]): Required if pointcloud_location and
-          video_frame_location are not present: The location containing the image for
-          the given row of data. This can be a local path, or a remote URL. Remote
-          formats supported include any URL (``http://`` or ``https://``) or URIs for
-          AWS S3, Azure, or GCS (i.e. ``s3://``, ``gcs://``).
-
-        pointcloud_location (Optional[str]): Required if image_location and
-          video_frame_location are not present: The remote URL containing the
-          pointcloud JSON. Remote formats supported include any URL (``http://``
-          or ``https://``) or URIs for AWS S3, Azure, or GCS (i.e. ``s3://``,
-          ``gcs://``).
-
-        video_frame_location (Optional[str]): Required if image_location and
-          pointcloud_location are not present: The remote URL containing the
-          video frame image. Remote formats supported include any URL (``http://``
-          or ``https://``) or URIs for AWS S3, Azure, or GCS (i.e. ``s3://``,
-          ``gcs://``).
+        image_location (Optional[str]): Required if pointcloud_location is not present:
+          The location containing the image for the given row of data. This can be a local
+          path, or a remote URL. Remote formats supported include any URL (``http://`` or
+          ``https://``) or URIs for AWS S3, Azure, or GCS (i.e. ``s3://``, ``gcs://``).
+
+        pointcloud_location (Optional[str]): Required if image_location is not present:
+          The remote URL containing the pointcloud JSON. Remote formats supported include
+          any URL (``http://`` or ``https://``) or URIs for AWS S3, Azure, or GCS (i.e.
+          ``s3://``, ``gcs://``).
 
         reference_id (Optional[str]): A user-specified identifier to reference the
           item.
@@ -212,33 +202,23 @@ class DatasetItem:  # pylint: disable=R0902
     metadata: Optional[dict] = None
     pointcloud_location: Optional[str] = None
     upload_to_scale: Optional[bool] = True
-    video_frame_location: Optional[str] = None
 
     def __post_init__(self):
         assert self.reference_id != "DUMMY_VALUE", "reference_id is required."
-        assert (
-            bool(self.image_location)
-            + bool(self.pointcloud_location)
-            + bool(self.video_frame_location)
-            == 1
-        ), "Must specify exactly one of the image_location, pointcloud_location, video_frame_location parameters"
-        if (
-            self.pointcloud_location or self.video_frame_location
-        ) and not self.upload_to_scale:
+        assert bool(self.image_location) != bool(
+            self.pointcloud_location
+        ), "Must specify exactly one of the image_location or pointcloud_location parameters"
+        if (self.pointcloud_location) and not self.upload_to_scale:
             raise NotImplementedError(
-                "Skipping upload to Scale is not currently implemented for pointclouds and videos."
+                "Skipping upload to Scale is not currently implemented for pointclouds."
             )
         self.local = (
             is_local_path(self.image_location) if self.image_location else None
         )
         self.type = (
             DatasetItemType.IMAGE
             if self.image_location
-            else (
-                DatasetItemType.POINTCLOUD
-                if self.pointcloud_location
-                else DatasetItemType.VIDEO
-            )
+            else DatasetItemType.POINTCLOUD
         )
         camera_params = (
             self.metadata.get(CAMERA_PARAMS_KEY, None)
@@ -258,7 +238,6 @@ def from_json(cls, payload: dict):
         return cls(
             image_location=image_url,
             pointcloud_location=payload.get(POINTCLOUD_URL_KEY, None),
-            video_frame_location=payload.get(VIDEO_FRAME_URL_KEY, None),
             reference_id=payload.get(REFERENCE_ID_KEY, None),
             metadata=payload.get(METADATA_KEY, {}),
             upload_to_scale=payload.get(UPLOAD_TO_SCALE_KEY, True),
@@ -281,8 +260,6 @@ def to_payload(self, is_scene=False) -> dict:
                 payload[URL_KEY] = self.image_location
             elif self.pointcloud_location:
                 payload[URL_KEY] = self.pointcloud_location
-            elif self.video_frame_location:
-                payload[URL_KEY] = self.video_frame_location
             payload[TYPE_KEY] = self.type.value
             if self.camera_params:
                 payload[CAMERA_PARAMS_KEY] = self.camera_params.to_payload()
diff --git a/nucleus/scene.py b/nucleus/scene.py
@@ -13,7 +13,6 @@
     NUM_SENSORS_KEY,
     POINTCLOUD_LOCATION_KEY,
     REFERENCE_ID_KEY,
-    VIDEO_FRAME_LOCATION_KEY,
     VIDEO_UPLOAD_TYPE_KEY,
 )
 
@@ -487,8 +486,11 @@ def validate(self):
                 item, DatasetItem
             ), "Each item in a scene must be a DatasetItem object"
             assert (
-                item.video_frame_location is not None
-            ), "Each item in a scene must have a video_frame_location"
+                item.image_location is not None
+            ), "Each item in a video scene must have an image_location"
+            assert (
+                item.upload_to_scale is not False
+            ), "Skipping upload to Scale is not currently implemented for videos"
 
     def add_item(
         self, item: DatasetItem, index: int = None, update: bool = False
@@ -600,9 +602,3 @@ def check_all_scene_paths_remote(
                     f"All paths for DatasetItems in a Scene must be remote, but {item.image_location} is either "
                     "local, or a remote URL type that is not supported."
                 )
-            video_frame_location = getattr(item, VIDEO_FRAME_LOCATION_KEY)
-            if video_frame_location and is_local_path(video_frame_location):
-                raise ValueError(
-                    f"All paths for DatasetItems in a Scene must be remote, but {item.video_frame_location} is either "
-                    "local, or a remote URL type that is not supported."
-                )
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ exclude = '''
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.6.6"
+version = "0.6.7"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -92,14 +92,14 @@
             "frame_rate": 15,
             "frames": [
                 {
-                    "video_frame_url": TEST_IMG_URLS[0],
-                    "type": "video",
+                    "image_url": TEST_IMG_URLS[0],
+                    "type": "image",
                     "reference_id": "video_frame_0",
                     "metadata": {"time": 123, "foo": "bar"},
                 },
                 {
-                    "video_frame_url": TEST_IMG_URLS[1],
-                    "type": "video",
+                    "image_url": TEST_IMG_URLS[1],
+                    "type": "image",
                     "reference_id": "video_frame_1",
                     "metadata": {"time": 124, "foo": "bar_2"},
                 },
@@ -124,28 +124,25 @@ def reference_id_from_url(url):
 
 TEST_VIDEO_ITEMS = [
     DatasetItem(
-        None,
+        TEST_IMG_URLS[0],
         reference_id_from_url(TEST_IMG_URLS[0]),
         None,
         None,
         True,
-        TEST_IMG_URLS[0],
     ),
     DatasetItem(
-        None,
+        TEST_IMG_URLS[1],
         reference_id_from_url(TEST_IMG_URLS[1]),
         None,
         None,
         True,
-        TEST_IMG_URLS[1],
     ),
     DatasetItem(
-        None,
+        TEST_IMG_URLS[2],
         reference_id_from_url(TEST_IMG_URLS[2]),
         None,
         None,
         True,
-        TEST_IMG_URLS[2],
     ),
 ]
 
diff --git a/tests/test_scene.py b/tests/test_scene.py
@@ -26,7 +26,6 @@
     TYPE_KEY,
     UPDATE_KEY,
     URL_KEY,
-    VIDEO_KEY,
     VIDEO_UPLOAD_TYPE_KEY,
 )
 from nucleus.scene import flatten
@@ -307,15 +306,15 @@ def test_video_scene_add_item():
         FRAME_RATE_KEY: frame_rate,
         FRAMES_KEY: [
             {
-                URL_KEY: TEST_VIDEO_ITEMS[2].video_frame_location,
+                URL_KEY: TEST_VIDEO_ITEMS[2].image_location,
                 REFERENCE_ID_KEY: TEST_VIDEO_ITEMS[2].reference_id,
-                TYPE_KEY: VIDEO_KEY,
+                TYPE_KEY: IMAGE_KEY,
                 METADATA_KEY: TEST_VIDEO_ITEMS[2].metadata or {},
             },
             {
-                URL_KEY: TEST_VIDEO_ITEMS[1].video_frame_location,
+                URL_KEY: TEST_VIDEO_ITEMS[1].image_location,
                 REFERENCE_ID_KEY: TEST_VIDEO_ITEMS[1].reference_id,
-                TYPE_KEY: VIDEO_KEY,
+                TYPE_KEY: IMAGE_KEY,
                 METADATA_KEY: TEST_VIDEO_ITEMS[1].metadata or {},
             },
         ],
@@ -572,7 +571,6 @@ def test_video_scene_upload_async(dataset_scene):
         VideoScene.from_json(scene_json) for scene_json in payload[SCENES_KEY]
     ]
     update = payload[UPDATE_KEY]
-
     job = dataset_scene.append(scenes, update=update, asynchronous=True)
     job.sleep_until_complete()
     status = job.status()

Original file line number	Diff line number	Diff line change
`@@ -641,7 +641,7 @@ def _append_video_scenes(`
`641`	`641`	`)`
`642`	`642`
`643`	`643`	`if asynchronous:`
`644`		`- # TODO check_all_scene_paths_remote(scenes)`
	`644`	`+ check_all_scene_paths_remote(scenes)`
`645`	`645`	`request_id = serialize_and_write_to_presigned_url(`
`646`	`646`	`scenes, self.id, self._client`
`647`	`647`	`)`