Skip to content

Commit 9247aa9

Browse files
authored
Merge pull request #96 from scaleapi/implement_scene_class
Implement Scene class
2 parents af14a2e + 85f69fe commit 9247aa9

File tree

11 files changed

+480
-80
lines changed

11 files changed

+480
-80
lines changed

nucleus/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@
109109
UPDATE_KEY,
110110
)
111111
from .dataset import Dataset
112-
from .dataset_item import DatasetItem
112+
from .dataset_item import DatasetItem, CameraParams
113113
from .errors import (
114114
DatasetItemRetrievalError,
115115
ModelCreationError,
@@ -135,6 +135,7 @@
135135
)
136136
from .slice import Slice
137137
from .upload_response import UploadResponse
138+
from .scene import Frame, Scene, LidarScene
138139

139140
# pylint: disable=E1101
140141
# TODO: refactor to reduce this file to under 1000 lines.

nucleus/annotation.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from dataclasses import dataclass
33
from enum import Enum
44
from typing import Dict, List, Optional, Sequence, Union
5-
from nucleus.dataset_item import is_local_path
5+
from urllib.parse import urlparse
66

77
from .constants import (
88
ANNOTATION_ID_KEY,
@@ -310,13 +310,8 @@ def to_payload(self) -> dict:
310310
}
311311

312312

313-
def check_all_frame_paths_remote(frames: List[str]):
314-
for frame_url in frames:
315-
if is_local_path(frame_url):
316-
raise ValueError(
317-
f"All paths must be remote, but {frame_url} is either "
318-
"local, or a remote URL type that is not supported."
319-
)
313+
def is_local_path(path: str) -> bool:
314+
return urlparse(path).scheme not in {"https", "http", "s3", "gs"}
320315

321316

322317
def check_all_mask_paths_remote(

nucleus/constants.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@
1212
ANNOTATION_UPDATE_KEY = "update"
1313
AUTOTAGS_KEY = "autotags"
1414
EXPORTED_ROWS = "exportedRows"
15+
CAMERA_PARAMS_KEY = "camera_params"
1516
CLASS_PDF_KEY = "class_pdf"
1617
CONFIDENCE_KEY = "confidence"
18+
CX_KEY = "cx"
19+
CY_KEY = "cy"
1720
DATASET_ID_KEY = "dataset_id"
1821
DATASET_ITEM_IDS_KEY = "dataset_item_ids"
1922
DATASET_ITEM_ID_KEY = "dataset_item_id"
@@ -30,16 +33,21 @@
3033
ERROR_CODES = "error_codes"
3134
ERROR_ITEMS = "upload_errors"
3235
ERROR_PAYLOAD = "error_payload"
33-
FRAMES = "frames"
36+
FRAMES_KEY = "frames"
37+
FX_KEY = "fx"
38+
FY_KEY = "fy"
3439
GEOMETRY_KEY = "geometry"
40+
HEADING_KEY = "heading"
3541
HEIGHT_KEY = "height"
3642
IGNORED_ITEMS = "ignored_items"
3743
IMAGE_KEY = "image"
44+
IMAGE_LOCATION_KEY = "image_location"
3845
IMAGE_URL_KEY = "image_url"
3946
INDEX_KEY = "index"
4047
ITEMS_KEY = "items"
4148
ITEM_ID_KEY = "item_id"
4249
ITEM_KEY = "item"
50+
ITEMS_KEY = "items"
4351
ITEM_METADATA_SCHEMA_KEY = "item_metadata_schema"
4452
JOB_ID_KEY = "job_id"
4553
KEEP_HISTORY_KEY = "keep_history"
@@ -57,13 +65,15 @@
5765
NEW_ITEMS = "new_items"
5866
NUCLEUS_ENDPOINT = "https://api.scale.com/v1/nucleus"
5967
ORIGINAL_IMAGE_URL_KEY = "original_image_url"
68+
POINTCLOUD_LOCATION_KEY = "pointcloud_location"
69+
POINTCLOUD_URL_KEY = "pointcloud_url"
6070
POSITION_KEY = "position"
6171
PREDICTIONS_IGNORED_KEY = "predictions_ignored"
6272
PREDICTIONS_PROCESSED_KEY = "predictions_processed"
6373
REFERENCE_IDS_KEY = "reference_ids"
6474
REFERENCE_ID_KEY = "reference_id"
6575
REQUEST_ID_KEY = "requestId"
66-
SCENES = "scenes"
76+
SCENES_KEY = "scenes"
6777
SEGMENTATIONS_KEY = "segmentations"
6878
SLICE_ID_KEY = "slice_id"
6979
STATUS_CODE_KEY = "status_code"
@@ -72,10 +82,11 @@
7282
TYPE_KEY = "type"
7383
UPDATED_ITEMS = "updated_items"
7484
UPDATE_KEY = "update"
75-
URL = "url"
85+
URL_KEY = "url"
7686
VERTICES_KEY = "vertices"
7787
WIDTH_KEY = "width"
7888
YAW_KEY = "yaw"
89+
W_KEY = "w"
7990
X_KEY = "x"
8091
Y_KEY = "y"
8192
Z_KEY = "z"

nucleus/dataset.py

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from .annotation import (
1414
Annotation,
1515
check_all_mask_paths_remote,
16-
check_all_frame_paths_remote,
1716
)
1817
from .constants import (
1918
DATASET_ITEM_IDS_KEY,
@@ -23,22 +22,24 @@
2322
DATASET_SLICES_KEY,
2423
DEFAULT_ANNOTATION_UPDATE_MODE,
2524
EXPORTED_ROWS,
26-
FRAMES,
2725
NAME_KEY,
2826
REFERENCE_IDS_KEY,
2927
REQUEST_ID_KEY,
30-
SCENES,
3128
UPDATE_KEY,
32-
URL,
3329
)
3430
from .dataset_item import (
3531
DatasetItem,
3632
check_all_paths_remote,
3733
check_for_duplicate_reference_ids,
3834
)
39-
from .payload_constructor import construct_model_run_creation_payload
35+
from .scene import LidarScene, check_all_scene_paths_remote
36+
from .payload_constructor import (
37+
construct_append_scenes_payload,
38+
construct_model_run_creation_payload,
39+
)
4040

4141
WARN_FOR_LARGE_UPLOAD = 50000
42+
WARN_FOR_LARGE_SCENES_UPLOAD = 5
4243

4344

4445
class Dataset:
@@ -199,16 +200,16 @@ def ingest_tasks(self, task_ids: dict):
199200

200201
def append(
201202
self,
202-
dataset_items: List[DatasetItem],
203+
items: Union[List[DatasetItem], List[LidarScene]],
203204
update: Optional[bool] = False,
204205
batch_size: Optional[int] = 20,
205206
asynchronous=False,
206207
) -> Union[dict, AsyncJob]:
207208
"""
208-
Appends images with metadata (dataset items) to the dataset. Overwrites images on collision if forced.
209+
Appends images with metadata (dataset items) or scenes to the dataset. Overwrites images on collision if forced.
209210
210211
Parameters:
211-
:param dataset_items: items to upload
212+
:param items: items to upload
212213
:param update: if True overwrites images and metadata on collision
213214
:param batch_size: batch parameter for long uploads
214215
:param aynchronous: if True, return a job object representing asynchronous ingestion job.
@@ -220,6 +221,17 @@ def append(
220221
'ignored_items': int,
221222
}
222223
"""
224+
dataset_items = [
225+
item for item in items if isinstance(item, DatasetItem)
226+
]
227+
scenes = [item for item in items if isinstance(item, LidarScene)]
228+
if dataset_items and scenes:
229+
raise Exception(
230+
"You must append either DatasetItems or Scenes to the dataset."
231+
)
232+
if scenes:
233+
return self.append_scenes(scenes, update, asynchronous)
234+
223235
check_for_duplicate_reference_ids(dataset_items)
224236

225237
if len(dataset_items) > WARN_FOR_LARGE_UPLOAD and not asynchronous:
@@ -248,39 +260,51 @@ def append(
248260
batch_size=batch_size,
249261
)
250262

251-
def upload_scenes(
263+
def append_scenes(
252264
self,
253-
payload: dict,
265+
scenes: List[LidarScene],
254266
update: Optional[bool] = False,
255-
asynchronous: bool = False,
267+
asynchronous: Optional[bool] = False,
256268
) -> Union[dict, AsyncJob]:
257269
"""
258-
Uploads scenes with given frames to the dataset
270+
Appends scenes with given frames (containing pointclouds and optional images) to the dataset
259271
260272
Parameters:
261-
:param payload: dictionary containing scenes to be uploaded
273+
:param scenes: scenes to upload
262274
:param update: if True, overwrite scene on collision
263-
:param aynchronous: if True, return a job object representing asynchronous ingestion job
275+
:param asynchronous: if True, return a job object representing asynchronous ingestion job
264276
:return:
265277
{
266278
'dataset_id': str,
267279
'new_scenes': int,
280+
'ignored_scenes': int,
281+
'scenes_errored': int,
282+
'errors': List[str],
268283
}
269284
"""
285+
for scene in scenes:
286+
scene.validate()
287+
288+
if len(scenes) > WARN_FOR_LARGE_SCENES_UPLOAD and not asynchronous:
289+
print(
290+
"Tip: for large uploads, get faster performance by importing your data "
291+
"into Nucleus directly from a cloud storage provider. See "
292+
"https://dashboard.scale.com/nucleus/docs/api?language=python#guide-for-large-ingestions"
293+
" for details."
294+
)
295+
270296
if asynchronous:
271-
for scene in payload[SCENES]:
272-
for frame in scene[FRAMES]:
273-
check_all_frame_paths_remote(frame[URL])
297+
check_all_scene_paths_remote(scenes)
274298
request_id = serialize_and_write_to_presigned_url(
275-
[payload], self.id, self._client
299+
scenes, self.id, self._client
276300
)
277301
response = self._client.make_request(
278302
payload={REQUEST_ID_KEY: request_id, UPDATE_KEY: update},
279303
route=f"{self.id}/upload_scenes?async=1",
280304
)
281305
return AsyncJob.from_json(response, self._client)
282306

283-
# TODO: create client method for sync scene upload
307+
payload = construct_append_scenes_payload(scenes, update)
284308
response = self._client.make_request(
285309
payload=payload,
286310
route=f"{self.id}/upload_scenes",

0 commit comments

Comments
 (0)