Skip to content

feat: allow uploading manually timestamped videos #156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion nominal/core/_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,19 @@ def put_multipart_upload(
All metadata-style requests (init, sign, complete) proxy through Nominal servers, while the upload PUT requests for
each part go to a pre-signed URL to the storage provider.

Ref: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
Args:
auth_header: Nominal authorization token
f: Binary IO to upload
filename: URL-safe filename to use when uploading to S3
mimetype: Type of data contained within binary stream
upload_client: Conjure upload client
chunk_size: Maximum size of chunk to upload to S3 at once
max_workers: Number of worker threads to use when processing and uploading data

Returns: Path to the uploaded object in S3

See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html

"""
# muiltithreaded multipart upload:
# - create a worker thread pool and a queue for all threads to share
Expand Down Expand Up @@ -133,6 +145,23 @@ def upload_multipart_io(
chunk_size: int = DEFAULT_CHUNK_SIZE,
max_workers: int = DEFAULT_NUM_WORKERS,
) -> str:
"""Execute a multipart upload to S3 proxied via Nominal servers

Args:
auth_header: Nominal authorization token
f: Binary IO to upload
name: Name of the file to create in S3
NOTE: does not need to be URL Safe
file_type: Type of data being uploaded
upload_client: Conjure upload client
chunk_size: Maximum size of chunk to upload to S3 at once
max_workers: Number of worker threads to use when processing and uploading data

Returns: Path to the uploaded object in S3

Note: see put_multipart_upload for more details

"""
urlsafe_name = urllib.parse.quote_plus(name)
safe_filename = f"{urlsafe_name}{file_type.extension}"
return put_multipart_upload(
Expand All @@ -154,6 +183,21 @@ def upload_multipart_file(
chunk_size: int = DEFAULT_CHUNK_SIZE,
max_workers: int = DEFAULT_NUM_WORKERS,
) -> str:
"""Execute a multipart upload to S3 proxied via Nominal servers.

Args:
auth_header: Nominal authorization token
file: File to upload to S3
upload_client: Conjure upload client
file_type: Manually override inferred file type for the given file
chunk_size: Maximum size of chunk to upload to S3 at once
max_workers: Number of worker threads to use when processing and uploading data

Returns: Path to the uploaded object in S3

Note: see put_multipart_upload for more details

"""
if file_type is None:
file_type = FileType.from_path(file)

Expand Down
32 changes: 28 additions & 4 deletions nominal/core/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,16 +318,40 @@ def create_video_from_io(
video: BinaryIO,
name: str,
start: datetime | IntegralNanosecondsUTC | None = None,
frame_timestamps: Sequence[IntegralNanosecondsUTC] | None = None,
description: str | None = None,
file_type: tuple[str, str] | FileType = FileTypes.MP4,
*,
labels: Sequence[str] = (),
properties: Mapping[str, str] | None = None,
frame_timestamps: Sequence[IntegralNanosecondsUTC] | None = None,
) -> Video:
"""Create a video from a file-like object.

The video must be a file-like object in binary mode, e.g. open(path, "rb") or io.BytesIO.

Args:
----
video: file-like object to read video data from
name: Name of the video to create in Nominal
start: Starting timestamp of the video
frame_timestamps: Per-frame timestamps (in nanoseconds since unix epoch) for every frame of the video
description: Description of the video to create in nominal
file_type: Type of data being uploaded
labels: Labels to apply to the video in nominal
properties: Properties to apply to the video in nominal

Returns:
-------
Handle to the created video

Note:
----
Exactly one of 'start' and 'frame_timestamps' **must** be provided. Most users will
want to provide a starting timestamp: frame_timestamps is primarily useful when the scale
of the video data is not 1:1 with the playback speed or non-uniform over the course of the video,
for example, 200fps video artificially slowed to 30 fps without dropping frames. This will result
in the playhead on charts within the product playing at the rate of the underlying data rather than
time elapsed in the video playback.

"""
if (start is None and frame_timestamps is None) and (None not in (start, frame_timestamps)):
raise ValueError("One of 'start' or 'frame_timestamps' must be provided")
Expand Down Expand Up @@ -546,7 +570,7 @@ def get_unit(self, unit_symbol: str) -> Unit | None:
NOTE: This currently requires that units are formatted as laid out in
the latest UCUM standards (see https://ucum.org/ucum)

Returns
Returns:
-------
Rendered Unit metadata if the symbol is valid and supported by Nominal, or None
if no such unit symbol matches.
Expand Down Expand Up @@ -582,7 +606,7 @@ def set_channel_units(self, rids_to_types: Mapping[str, str | None]) -> Sequence
rids_to_types: Mapping of channel RIDs -> unit symbols (e.g. 'm/s').
NOTE: Providing `None` as the unit symbol clears any existing units for the channels.

Returns
Returns:
-------
A sequence of metadata for all updated channels
Raises:
Expand Down
4 changes: 2 additions & 2 deletions nominal/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def poll_until_ingestion_completed(self, interval: timedelta = timedelta(seconds
"""Block until dataset ingestion has completed.
This method polls Nominal for ingest status after uploading a dataset on an interval.

Raises
Raises:
------
NominalIngestFailed: if the ingest failed
NominalIngestError: if the ingest status is not known
Expand Down Expand Up @@ -395,7 +395,7 @@ def poll_until_ingestion_completed(datasets: Iterable[Dataset], interval: timede
This method polls Nominal for ingest status on each of the datasets on an interval.
No specific ordering is guaranteed, but all datasets will be checked at least once.

Raises
Raises:
------
NominalIngestMultiError: if any of the datasets failed to ingest

Expand Down
2 changes: 1 addition & 1 deletion nominal/core/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def poll_until_ingestion_completed(self, interval: timedelta = timedelta(seconds
"""Block until video ingestion has completed.
This method polls Nominal for ingest status after uploading a video on an interval.

Raises
Raises:
------
NominalIngestFailed: if the ingest failed
NominalIngestError: if the ingest status is not known
Expand Down
2 changes: 1 addition & 1 deletion nominal/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class NominalIngestError(NominalError):
class NominalIngestMultiError(NominalError):
"""Error(s) occurred during ingest.

Attributes
Attributes:
----------
errors: A mapping of dataset RIDs to the errors that occurred during ingest.

Expand Down
6 changes: 5 additions & 1 deletion nominal/nominal.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,11 @@ def upload_video(
file_type = FileType.from_path(path)
with open(file, "rb") as f:
return conn.create_video_from_io(
f, name, ts._SecondsNanos.from_flexible(start).to_nanoseconds(), description, file_type
f,
name,
start=ts._SecondsNanos.from_flexible(start).to_nanoseconds(),
description=description,
file_type=file_type,
)


Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ line-length = 120
exclude = ["nominal/_api/*"]
include = ["nominal/**/*.py", "tests/**/*.py"]

[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.ruff.lint.pylint]
max-args = 10

Expand Down
Loading