Skip to content

feat: cleanup & simplify timestamp handling #54

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 54 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
ab36c87
add time domain types
alkasm Sep 20, 2024
6f41906
Merge branch 'main' into alkasm/the-good-times
alkasm Sep 20, 2024
fca2efd
add time domain impl
alkasm Sep 23, 2024
d62bd8e
fix relative offset handling
alkasm Sep 23, 2024
d247c97
fix offset -> start, and fix key misnaming
alkasm Sep 23, 2024
c77f85b
reorganize time utils, fix timestamp handling
alkasm Sep 23, 2024
4082588
remove extraneous ellipses
alkasm Sep 23, 2024
ff86534
fmt fix
alkasm Sep 23, 2024
588ad32
remove old types
alkasm Sep 23, 2024
c71d8cf
fixed warning example
alkasm Sep 24, 2024
7e808a2
move part of warning into code comment
alkasm Sep 24, 2024
8fb1d86
remove unused type
alkasm Sep 24, 2024
a4e8daa
python 3.9 support
alkasm Sep 24, 2024
3f2d7e5
warning format
alkasm Sep 24, 2024
f1fcab2
update tests
alkasm Sep 24, 2024
b70207e
move iso8601 formatting to utils
alkasm Sep 24, 2024
b2e8afa
more explicitly named function
alkasm Sep 24, 2024
16852d2
rename timedomain -> ts
alkasm Sep 24, 2024
adc07f8
move integral nanoseconds to ts
alkasm Sep 24, 2024
f70135e
fix
alkasm Sep 24, 2024
2499d0c
deal w/ circular import
alkasm Sep 24, 2024
1369fe1
created an abc for time domains
alkasm Sep 24, 2024
1eaf9dd
add ts to toplevel and reorder all
alkasm Sep 24, 2024
14ac3e0
rename for similarity
alkasm Sep 24, 2024
3c38e28
all timestamp conversions through an intermediary type
alkasm Sep 24, 2024
6ec4586
move SecondsNanos to new ts file, make internal
alkasm Sep 24, 2024
8507888
remove timeutils
alkasm Sep 24, 2024
c1a92a6
update e2e tests
alkasm Sep 24, 2024
82ab291
unify on time_domain name
alkasm Sep 24, 2024
2c8df1e
documentation once-over on ts.py
alkasm Sep 24, 2024
fd1a721
correct comment about offset
alkasm Sep 24, 2024
a12b0f4
.to_integral_nanoseconds() -> .to_ns()
alkasm Sep 24, 2024
30cb8e6
ns felt too short
alkasm Sep 24, 2024
b4667d6
typing extensions alias, self
alkasm Sep 24, 2024
5eefad2
check types for all python versions
alkasm Sep 24, 2024
de8ce09
domain -> type
alkasm Sep 24, 2024
ee1b368
expand core to string types
alkasm Sep 24, 2024
d16efbe
fix
alkasm Sep 24, 2024
32358b1
Merge branch 'main' into alkasm/the-good-times
alkasm Sep 25, 2024
d98f573
missed a save
alkasm Sep 25, 2024
b6db027
add new timestamp type
alkasm Sep 25, 2024
ab6a830
fix e2e test
alkasm Sep 25, 2024
3bcb63e
export LogTimestampType from ts
alkasm Sep 25, 2024
599878f
add unit tests for time conversions
alkasm Sep 25, 2024
b0c48d2
cleanups
alkasm Sep 25, 2024
f60ebff
Merge branch 'main' into alkasm/the-good-times
alkasm Sep 30, 2024
0fa2d61
fix e2e tests
alkasm Sep 30, 2024
4ad240a
isoformat strings with Z only supported in python 3.11+
alkasm Sep 30, 2024
e5586c8
only compare seconds of datetime obj
alkasm Sep 30, 2024
534e901
add check-types-all
alkasm Sep 30, 2024
0f23c7d
remove extra utils func
alkasm Sep 30, 2024
1b104f3
add javadoc format reference.
alkasm Sep 30, 2024
656ea44
add docs and many e2e tests
alkasm Sep 30, 2024
a07040c
clean up ts docs
alkasm Sep 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ test-e2e token:

# check static typing
check-types:
poetry run mypy
poetry run mypy --python-version 3.12
poetry run mypy --python-version 3.11
poetry run mypy --python-version 3.10
poetry run mypy --python-version 3.9

# check code formatting | fix with `just fix-format`
check-format:
Expand Down
24 changes: 13 additions & 11 deletions nominal/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import ts
from .core import Attachment, Dataset, NominalClient, Run, Video
from .nominal import (
create_run,
Expand All @@ -18,25 +19,26 @@
)

__all__ = [
"set_base_url",
"get_default_client",
"upload_pandas",
"upload_polars",
"upload_csv",
"get_dataset",
"ts",
"create_run",
"create_run_csv",
"download_attachment",
"get_attachment",
"get_dataset",
"get_default_client",
"get_run",
"get_video",
"search_runs",
"set_base_url",
"upload_attachment",
"get_attachment",
"download_attachment",
"upload_csv",
"upload_pandas",
"upload_polars",
"upload_video",
"get_video",
# classes: when adding a new class, also add a filter to "hide" it in docs/reference/toplevel.md
"Dataset",
"Run",
"Attachment",
"Dataset",
"NominalClient",
"Run",
"Video",
]
107 changes: 1 addition & 106 deletions nominal/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,119 +4,14 @@
import mimetypes
import os
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import BinaryIO, Iterable, Iterator, Literal, NamedTuple, Type, TypeVar, Union

import dateutil.parser
from typing_extensions import TypeAlias # typing.TypeAlias in 3.10+

from ._api.combined import ingest_api, scout_run_api
from typing import BinaryIO, Iterable, Iterator, NamedTuple, TypeVar

logger = logging.getLogger(__name__)

IntegralNanosecondsUTC = int
T = TypeVar("T")


@dataclass
class CustomTimestampFormat:
format: str
default_year: int = 0


# Using Union rather than the "|" operator due to https://github.com/python/mypy/issues/11665.
TimestampColumnType: TypeAlias = Union[
Literal[
"iso_8601",
"epoch_days",
"epoch_hours",
"epoch_minutes",
"epoch_seconds",
"epoch_milliseconds",
"epoch_microseconds",
"epoch_nanoseconds",
"relative_days",
"relative_hours",
"relative_minutes",
"relative_seconds",
"relative_milliseconds",
"relative_microseconds",
"relative_nanoseconds",
],
CustomTimestampFormat,
]


def _timestamp_type_to_conjure_ingest_api(
ts_type: TimestampColumnType,
) -> ingest_api.TimestampType:
if isinstance(ts_type, CustomTimestampFormat):
return ingest_api.TimestampType(
absolute=ingest_api.AbsoluteTimestamp(
custom_format=ingest_api.CustomTimestamp(format=ts_type.format, default_year=ts_type.default_year)
)
)
elif ts_type == "iso_8601":
return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(iso8601=ingest_api.Iso8601Timestamp()))
relation, unit = ts_type.split("_", 1)
time_unit = ingest_api.TimeUnit[unit.upper()]
if relation == "epoch":
return ingest_api.TimestampType(
absolute=ingest_api.AbsoluteTimestamp(epoch_of_time_unit=ingest_api.EpochTimestamp(time_unit=time_unit))
)
elif relation == "relative":
return ingest_api.TimestampType(relative=ingest_api.RelativeTimestamp(time_unit=time_unit))
raise ValueError(f"invalid timestamp type: {ts_type}")


def _flexible_time_to_conjure_scout_run_api(timestamp: datetime | IntegralNanosecondsUTC) -> scout_run_api.UtcTimestamp:
seconds, nanos = _flexible_time_to_seconds_nanos(timestamp)
return scout_run_api.UtcTimestamp(seconds_since_epoch=seconds, offset_nanoseconds=nanos)


def _flexible_time_to_conjure_ingest_api(
timestamp: datetime | IntegralNanosecondsUTC,
) -> ingest_api.UtcTimestamp:
seconds, nanos = _flexible_time_to_seconds_nanos(timestamp)
return ingest_api.UtcTimestamp(seconds_since_epoch=seconds, offset_nanoseconds=nanos)


def _flexible_time_to_seconds_nanos(
timestamp: datetime | IntegralNanosecondsUTC,
) -> tuple[int, int]:
if isinstance(timestamp, datetime):
return _datetime_to_seconds_nanos(timestamp)
elif isinstance(timestamp, IntegralNanosecondsUTC):
return divmod(timestamp, 1_000_000_000)
raise TypeError(f"expected {datetime} or {IntegralNanosecondsUTC}, got {type(timestamp)}")


def _conjure_time_to_integral_nanoseconds(ts: scout_run_api.UtcTimestamp) -> IntegralNanosecondsUTC:
return ts.seconds_since_epoch * 1_000_000_000 + (ts.offset_nanoseconds or 0)


def _datetime_to_seconds_nanos(dt: datetime) -> tuple[int, int]:
dt = dt.astimezone(timezone.utc)
seconds = int(dt.timestamp())
nanos = dt.microsecond * 1000
return seconds, nanos


def _datetime_to_integral_nanoseconds(dt: datetime) -> IntegralNanosecondsUTC:
seconds, nanos = _datetime_to_seconds_nanos(dt)
return seconds * 1_000_000_000 + nanos


def _parse_timestamp(ts: str | datetime | IntegralNanosecondsUTC) -> IntegralNanosecondsUTC:
if isinstance(ts, int):
return ts
if isinstance(ts, str):
ts = dateutil.parser.parse(ts)
return _datetime_to_integral_nanoseconds(ts)


def construct_user_agent_string() -> str:
"""Constructs a user-agent string with system & Python metadata.
E.g.: nominal-python/1.0.0b0 (macOS-14.4-arm64-arm-64bit) cpython/3.12.4
Expand Down
12 changes: 2 additions & 10 deletions nominal/cli/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import click

from ..nominal import _upload_csv
from ..ts import _LiteralAbsolute
from ._utils import BASE_URL_OPTION, TOKEN_OPTION, get_client


Expand Down Expand Up @@ -43,16 +44,7 @@ def upload_csv(
name: str,
file: str,
timestamp_column: str,
timestamp_type: Literal[
"iso_8601",
"epoch_days",
"epoch_hours",
"epoch_minutes",
"epoch_seconds",
"epoch_milliseconds",
"epoch_microseconds",
"epoch_nanoseconds",
],
timestamp_type: _LiteralAbsolute,
desc: str | None,
wait: bool,
base_url: str,
Expand Down
6 changes: 3 additions & 3 deletions nominal/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import click

from .._utils import _parse_timestamp
from ..ts import _SecondsNanos
from ._utils import BASE_URL_OPTION, TOKEN_OPTION, get_client


Expand Down Expand Up @@ -36,8 +36,8 @@ def create(
client = get_client(base_url, token)
run = client.create_run(
name,
_parse_timestamp(start),
_parse_timestamp(end),
_SecondsNanos.from_flexible(start).to_nanoseconds(),
_SecondsNanos.from_flexible(end).to_nanoseconds(),
desc,
properties=dict(properties),
labels=labels,
Expand Down
49 changes: 15 additions & 34 deletions nominal/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,16 @@
upload_api,
)
from ._multipart import put_multipart_upload
from ._utils import (
CustomTimestampFormat,
FileType,
FileTypes,
IntegralNanosecondsUTC,
TimestampColumnType,
_conjure_time_to_integral_nanoseconds,
_flexible_time_to_conjure_ingest_api,
_flexible_time_to_conjure_scout_run_api,
_timestamp_type_to_conjure_ingest_api,
construct_user_agent_string,
update_dataclass,
)
from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass
from .exceptions import NominalIngestError, NominalIngestFailed
from .ts import IntegralNanosecondsUTC, _AnyTimestampType, _SecondsNanos, _to_typed_timestamp_type

__all__ = [
"NominalClient",
"Run",
"Dataset",
"Attachment",
"Video",
"IntegralNanosecondsUTC",
"CustomTimestampFormat",
]


Expand Down Expand Up @@ -172,8 +159,8 @@ def _from_conjure(cls, nominal_client: NominalClient, run: scout_run_api.Run) ->
description=run.description,
properties=MappingProxyType(run.properties),
labels=tuple(run.labels),
start=_conjure_time_to_integral_nanoseconds(run.start_time),
end=(_conjure_time_to_integral_nanoseconds(run.end_time) if run.end_time else None),
start=_SecondsNanos.from_scout_run_api(run.start_time).to_nanoseconds(),
end=(_SecondsNanos.from_scout_run_api(run.end_time).to_nanoseconds() if run.end_time else None),
_client=nominal_client,
)

Expand Down Expand Up @@ -250,7 +237,7 @@ def update(
update_dataclass(self, dataset, fields=self.__dataclass_fields__)
return self

def add_csv_to_dataset(self, path: Path | str, timestamp_column: str, timestamp_type: TimestampColumnType) -> None:
def add_csv_to_dataset(self, path: Path | str, timestamp_column: str, timestamp_type: _AnyTimestampType) -> None:
"""Append to a dataset from a csv on-disk."""
path, file_type = _verify_csv_path(path)
with open(path, "rb") as csv_file:
Expand All @@ -260,20 +247,14 @@ def add_to_dataset_from_io(
self,
dataset: BinaryIO,
timestamp_column: str,
timestamp_type: TimestampColumnType,
timestamp_type: _AnyTimestampType,
file_type: tuple[str, str] | FileType = FileTypes.CSV,
) -> None:
"""Append to a dataset from a file-like object.

file_type: a (extension, mimetype) pair describing the type of file.
"""

if not isinstance(timestamp_type, CustomTimestampFormat):
if timestamp_type.startswith("relative"):
raise ValueError(
"multifile datasets with relative timestamps are not yet supported by the client library"
)

if isinstance(dataset, TextIOBase):
raise TypeError(f"dataset {dataset!r} must be open in binary mode, rather than text mode")

Expand All @@ -293,7 +274,7 @@ def add_to_dataset_from_io(
source_metadata=ingest_api.IngestSourceMetadata(
timestamp_metadata=ingest_api.TimestampMetadata(
series_name=timestamp_column,
timestamp_type=_timestamp_type_to_conjure_ingest_api(timestamp_type),
timestamp_type=_to_typed_timestamp_type(timestamp_type)._to_conjure_ingest_api(),
),
),
)
Expand Down Expand Up @@ -527,9 +508,9 @@ def create_run(
labels=list(labels),
links=[],
properties={} if properties is None else dict(properties),
start_time=_flexible_time_to_conjure_scout_run_api(start),
start_time=_SecondsNanos.from_flexible(start).to_scout_run_api(),
title=name,
end_time=_flexible_time_to_conjure_scout_run_api(end),
end_time=_SecondsNanos.from_flexible(end).to_scout_run_api(),
)
response = self._run_client.create_run(self._auth_header, request)
return Run._from_conjure(self, response)
Expand Down Expand Up @@ -592,7 +573,7 @@ def create_csv_dataset(
path: Path | str,
name: str | None,
timestamp_column: str,
timestamp_type: TimestampColumnType,
timestamp_type: _AnyTimestampType,
description: str | None = None,
*,
labels: Sequence[str] = (),
Expand Down Expand Up @@ -624,7 +605,7 @@ def create_dataset_from_io(
dataset: BinaryIO,
name: str,
timestamp_column: str,
timestamp_type: TimestampColumnType,
timestamp_type: _AnyTimestampType,
file_type: tuple[str, str] | FileType = FileTypes.CSV,
description: str | None = None,
*,
Expand Down Expand Up @@ -664,7 +645,7 @@ def create_dataset_from_io(
source_metadata=ingest_api.IngestSourceMetadata(
timestamp_metadata=ingest_api.TimestampMetadata(
series_name=timestamp_column,
timestamp_type=_timestamp_type_to_conjure_ingest_api(timestamp_type),
timestamp_type=_to_typed_timestamp_type(timestamp_type)._to_conjure_ingest_api(),
),
),
)
Expand Down Expand Up @@ -700,7 +681,7 @@ def create_video_from_io(
sources=[ingest_api.IngestSource(s3=ingest_api.S3IngestSource(path=s3_path))],
timestamps=ingest_api.VideoTimestampManifest(
no_manifest=ingest_api.NoTimestampManifest(
starting_timestamp=_flexible_time_to_conjure_ingest_api(start)
starting_timestamp=_SecondsNanos.from_flexible(start).to_ingest_api()
)
),
description=description,
Expand Down Expand Up @@ -839,10 +820,10 @@ def _create_search_runs_query(
) -> scout_run_api.SearchQuery:
queries = []
if start is not None:
q = scout_run_api.SearchQuery(start_time_inclusive=_flexible_time_to_conjure_scout_run_api(start))
q = scout_run_api.SearchQuery(start_time_inclusive=_SecondsNanos.from_flexible(start).to_scout_run_api())
queries.append(q)
if end is not None:
q = scout_run_api.SearchQuery(end_time_inclusive=_flexible_time_to_conjure_scout_run_api(end))
q = scout_run_api.SearchQuery(end_time_inclusive=_SecondsNanos.from_flexible(end).to_scout_run_api())
queries.append(q)
if exact_name is not None:
q = scout_run_api.SearchQuery(exact_match=exact_name)
Expand Down
Loading
Loading