From ab36c87c81cde1ca67ed9e10ef8e31c99e47fb27 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Fri, 20 Sep 2024 17:18:51 -0400 Subject: [PATCH 01/51] add time domain types --- nominal/nominal.py | 34 +++--- nominal/timedomain.py | 235 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 250 insertions(+), 19 deletions(-) create mode 100644 nominal/timedomain.py diff --git a/nominal/nominal.py b/nominal/nominal.py index 16f09ee5..deab2e88 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -13,11 +13,11 @@ FileType, FileTypes, IntegralNanosecondsUTC, - TimestampColumnType, _parse_timestamp, reader_writer, ) from .core import Attachment, Dataset, NominalClient, Run, Video +from . import timedomain if TYPE_CHECKING: import pandas as pd @@ -58,13 +58,14 @@ def upload_pandas( df: pd.DataFrame, name: str, timestamp_column: str, - timestamp_type: TimestampColumnType, + timestamp_type: timedomain._AnyTimeDomain, description: str | None = None, *, wait_until_complete: bool = True, ) -> Dataset: """Create a dataset in the Nominal platform from a pandas.DataFrame.""" conn = get_default_client() + time_domain = timedomain._make_typed_time_domain(timestamp_type) # TODO(alkasm): use parquet instead of CSV as an intermediary @@ -80,7 +81,7 @@ def write_and_close(df: pd.DataFrame, w: BinaryIO) -> None: reader, name, timestamp_column=timestamp_column, - timestamp_type=timestamp_type, + timestamp_type=time_domain, file_type=FileTypes.CSV, description=description, ) @@ -94,13 +95,14 @@ def upload_polars( df: pl.DataFrame, name: str, timestamp_column: str, - timestamp_type: TimestampColumnType, + timestamp_type: timedomain._AnyTimeDomain, description: str | None = None, *, wait_until_complete: bool = True, ) -> Dataset: """Create a dataset in the Nominal platform from a polars.DataFrame.""" conn = get_default_client() + time_domain = timedomain._make_typed_time_domain(timestamp_type) def write_and_close(df: pl.DataFrame, w: BinaryIO) -> None: df.write_csv(w) @@ -114,7 +116,7 @@ def write_and_close(df: pl.DataFrame, w: BinaryIO) -> None: reader, name, timestamp_column=timestamp_column, - timestamp_type=timestamp_type, + timestamp_type=time_domain, file_type=FileTypes.CSV, description=description, ) @@ -128,7 +130,7 @@ def upload_csv( file: Path | str, name: str | None, timestamp_column: str, - timestamp_type: TimestampColumnType, + timestamp_type: timedomain._AnyTimeDomain, description: str | None = None, *, wait_until_complete: bool = True, @@ -148,16 +150,17 @@ def _upload_csv( file: Path | str, name: str | None, timestamp_column: str, - timestamp_type: TimestampColumnType, + timestamp_type: timedomain._AnyTimeDomain, description: str | None = None, *, wait_until_complete: bool = True, ) -> Dataset: + time_domain = timedomain._make_typed_time_domain(timestamp_type) dataset = conn.create_csv_dataset( file, name, timestamp_column=timestamp_column, - timestamp_type=timestamp_type, + timestamp_type=time_domain, description=description, ) if wait_until_complete: @@ -194,16 +197,7 @@ def create_run_csv( file: Path | str, name: str, timestamp_column: str, - timestamp_type: Literal[ - "iso_8601", - "epoch_days", - "epoch_hours", - "epoch_minutes", - "epoch_seconds", - "epoch_milliseconds", - "epoch_microseconds", - "epoch_nanoseconds", - ], + timestamp_type: timedomain._LiteralAbsolute | timedomain.Iso8601 | timedomain.Epoch, description: str | None = None, ) -> Run: """Create a dataset from a CSV file, and create a run based on it. @@ -217,6 +211,8 @@ def create_run_csv( The run start and end times are created from the minimum and maximum timestamps in the CSV file in the timestamp column. """ + ... + raise RuntimeError("fix this") try: start, end = _get_start_end_timestamp_csv_file(file, timestamp_column, timestamp_type) except ValueError as e: @@ -307,7 +303,7 @@ def get_video(rid: str) -> Video: def _get_start_end_timestamp_csv_file( - file: Path | str, timestamp_column: str, timestamp_type: TimestampColumnType + file: Path | str, timestamp_column: str, timestamp_type: timedomain._AnyTimeDomain ) -> tuple[IntegralNanosecondsUTC, IntegralNanosecondsUTC]: import pandas as pd diff --git a/nominal/timedomain.py b/nominal/timedomain.py new file mode 100644 index 00000000..88968472 --- /dev/null +++ b/nominal/timedomain.py @@ -0,0 +1,235 @@ +""" +# _AnyTimeDomain values + +# _LiteralTimeDomain +"iso_8601" +"epoch_nanoseconds" +"epoch_microseconds" +"epoch_milliseconds" +"epoch_seconds" +"epoch_minutes" +"epoch_hours" +"relative_nanoseconds" # <-- should we allow implicit offset=None? +"relative_microseconds" # <-- should we allow implicit offset=None? +"relative_milliseconds" # <-- should we allow implicit offset=None? +"relative_seconds" # <-- should we allow implicit offset=None? +"relative_minutes" # <-- should we allow implicit offset=None? +"relative_hours" # <-- should we allow implicit offset=None? + +# TypedTimeDomain constants - are these useful? +ISO_8601 +EPOCH_NANOSECONDS +EPOCH_MICROSECONDS +EPOCH_MILLISECONDS +EPOCH_SECONDS +EPOCH_MINUTES +EPOCH_HOURS +RELATIVE_NANOSECONDS # <-- should we allow implicit offset=None? +RELATIVE_MICROSECONDS # <-- should we allow implicit offset=None? +RELATIVE_MILLISECONDS # <-- should we allow implicit offset=None? +RELATIVE_SECONDS # <-- should we allow implicit offset=None? +RELATIVE_MINUTES # <-- should we allow implicit offset=None? +RELATIVE_HOURS # <-- should we allow implicit offset=None? + +# TypedTimeDomain +Epoch("nanoseconds") +Epoch("microseconds") +Epoch("milliseconds") +Epoch("seconds") +Epoch("minutes") +Epoch("hours") +Relative("nanoseconds") # <-- should we allow implicit offset=None? +Relative("microseconds") # <-- should we allow implicit offset=None? +Relative("milliseconds") # <-- should we allow implicit offset=None? +Relative("seconds") # <-- should we allow implicit offset=None? +Relative("minutes") # <-- should we allow implicit offset=None? +Relative("hours") # <-- should we allow implicit offset=None? +Relative("nanoseconds", offset=15) +Relative("microseconds", offset=15) +Relative("milliseconds", offset=15) +Relative("seconds", offset=15) +Relative("minutes", offset=15) +Relative("hours", offset=15) +Custom(r"yyyy-MM-dd[T]hh:mm:ss") +Custom(r"MM-dd[T]hh:mm:ss", default_year=2024) +""" + +from __future__ import annotations +from dataclasses import dataclass +from types import MappingProxyType +from typing import Literal, Mapping +from typing_extensions import TypeAlias +from nominal._api.combined import ingest_api + +IntegralNanosecondsUTC: TypeAlias = int + + +@dataclass(frozen=True) +class Iso8601: + pass + + +@dataclass(frozen=True) +class Epoch: + unit: _LiteralTimeUnit + + +@dataclass(frozen=True) +class Relative: + unit: _LiteralTimeUnit + offset: int | None = None + """Offset from the beginning of a data collection. The offset must be in the same units as the timestamp type itself.""" + # TODO(alkasm): is ^ true or is it just nanoseconds? + # TODO(alkasm): may be okay with offset=0, but may need to detect presence? + # the backend allows for None on the first upload, but may error on the second one + # but if we default to 0, it may always work and overwrite - so None helps prevent? + # or we can just disallow not specifying the offset + + +@dataclass(frozen=True) +class Custom: + format: str + default_year: int | None = None + + +ISO_8601 = Iso8601() +EPOCH_NANOSECONDS = Epoch("nanoseconds") +EPOCH_MICROSECONDS = Epoch("microseconds") +EPOCH_MILLISECONDS = Epoch("milliseconds") +EPOCH_SECONDS = Epoch("seconds") +EPOCH_MINUTES = Epoch("minutes") +EPOCH_HOURS = Epoch("hours") +RELATIVE_NANOSECONDS = Relative("nanoseconds") +RELATIVE_MICROSECONDS = Relative("microseconds") +RELATIVE_MILLISECONDS = Relative("milliseconds") +RELATIVE_SECONDS = Relative("seconds") +RELATIVE_MINUTES = Relative("minutes") +RELATIVE_HOURS = Relative("hours") + +_LiteralTimeUnit: TypeAlias = Literal[ + "nanoseconds", + "microseconds", + "milliseconds", + "seconds", + "minutes", + "hours", +] + +_LiteralAbsolute: TypeAlias = Literal[ + "iso_8601", + "epoch_nanoseconds", + "epoch_microseconds", + "epoch_milliseconds", + "epoch_seconds", + "epoch_minutes", + "epoch_hours", +] + +_LiteralRelative: TypeAlias = Literal[ + "relative_nanoseconds", + "relative_microseconds", + "relative_milliseconds", + "relative_seconds", + "relative_minutes", + "relative_hours", +] + + +TypedTimeDomain: TypeAlias = Iso8601 | Epoch | Relative | Custom +_LiteralTimeDomain: TypeAlias = _LiteralAbsolute | _LiteralRelative +_AnyTimeDomain: TypeAlias = TypedTimeDomain | _LiteralTimeDomain + + +def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: + if isinstance(domain, TypedTimeDomain): + return domain + if not isinstance(domain, str): + raise TypeError(f"timestamp type {domain} must be a string or an instance of one of: {TypedTimeDomain}") + if domain not in _str_to_type: + raise ValueError(f"string time domains must be one of: {_str_to_type.keys()}") + if domain.startswith("relative_"): + # see TODO in class Relative if we want to deprecate implicit offset=None + pass + return _str_to_type[domain] + + +def _to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: + if isinstance(domain, Iso8601): + return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(iso8601=ingest_api.Iso8601Timestamp())) + if isinstance(domain, Epoch): + epoch = ingest_api.EpochTimestamp(time_unit=domain.unit) + return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(epoch_of_time_unit=epoch)) + if isinstance(domain, Custom): + fmt = ingest_api.CustomTimestamp(format=domain.format, default_year=domain.default_year) + return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(custom_format=fmt)) + if isinstance(domain, Relative): + relative = ingest_api.RelativeTimestamp(time_unit=domain.unit, offset=domain.offset) + return ingest_api.TimestampType(relative=relative) + raise TypeError(f"invalid time domain type: {type(domain)}") + + +_str_to_type: Mapping[_LiteralTimeDomain, Iso8601 | Epoch | Relative] = MappingProxyType( + { + "iso_8601": ISO_8601, + "epoch_nanoseconds": EPOCH_NANOSECONDS, + "epoch_microseconds": EPOCH_MICROSECONDS, + "epoch_milliseconds": EPOCH_MILLISECONDS, + "epoch_seconds": EPOCH_SECONDS, + "epoch_minutes": EPOCH_MINUTES, + "epoch_hours": EPOCH_HOURS, + "relative_nanoseconds": RELATIVE_NANOSECONDS, + "relative_microseconds": RELATIVE_MICROSECONDS, + "relative_milliseconds": RELATIVE_MILLISECONDS, + "relative_seconds": RELATIVE_SECONDS, + "relative_minutes": RELATIVE_MINUTES, + "relative_hours": RELATIVE_HOURS, + } +) + +""" +Exploration: + +# winner: mix of strings + types, but advertise with singletons +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.iso_8601) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.absolute_nanoseconds) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Relative("nanoseconds", offset=15)) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Custom(r"yyyy-MM-dd[T]hh:mm:ss")) + +# current: mix of strings + types: relative offsets not supported yet, would need to create a new type for it +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", "iso_8601") +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", "epoch_nanoseconds") +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", "relative_nanoseconds") +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.CustomTimestampFormat(r"yyyy-MM-dd[T]hh:mm:ss")) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.RelativeTimestampFormat("nanoseconds", offset=15)) + +# strongly typed: types and docstrings are extremely clear, allows more flexibility per-type +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Iso8601()) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Absolute("ns")) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Relative("nanoseconds", offset=15)) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Custom(r"yyyy-MM-dd[T]hh:mm:ss")) + +# flexible factory function: multiple behaviors depending on how you call it; difficult to document but easy to read +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain("iso_8601")) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain("absolute", "ns")) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain("relative", "ns", offset=15)) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain("custom", r"yyyy-MM-dd[T]hh:mm:ss")) + +# mix of strings + types, but advertise with singletons +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.iso_8601) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.absolute_nanoseconds) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Relative("nanoseconds", offset=15)) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Custom(r"yyyy-MM-dd[T]hh:mm:ss")) + +# factory functions for each type - pretty chill but wordier than strings +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.iso_8601()) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.absolute_nanoseconds()) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.relative_nanoseconds(offset=15)) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.custom(r"yyyy-MM-dd[T]hh:mm:ss")) + +# singletons (with transforming class methods) - too magic, custom() behaves differently +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.iso_8601) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.absolute_nanoseconds) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.relative_nanoseconds) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.relative_nanoseconds.offset(15)) +dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.custom(r"yyyy-MM-dd[T]hh:mm:ss")) +""" From fca2efd9431a658a8e0f3f63c9f38c911cf11c9c Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 13:06:57 -0400 Subject: [PATCH 02/51] add time domain impl --- nominal/timedomain.py | 154 +++++++----------------------------------- 1 file changed, 23 insertions(+), 131 deletions(-) diff --git a/nominal/timedomain.py b/nominal/timedomain.py index 88968472..35e0341e 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -1,63 +1,10 @@ -""" -# _AnyTimeDomain values - -# _LiteralTimeDomain -"iso_8601" -"epoch_nanoseconds" -"epoch_microseconds" -"epoch_milliseconds" -"epoch_seconds" -"epoch_minutes" -"epoch_hours" -"relative_nanoseconds" # <-- should we allow implicit offset=None? -"relative_microseconds" # <-- should we allow implicit offset=None? -"relative_milliseconds" # <-- should we allow implicit offset=None? -"relative_seconds" # <-- should we allow implicit offset=None? -"relative_minutes" # <-- should we allow implicit offset=None? -"relative_hours" # <-- should we allow implicit offset=None? - -# TypedTimeDomain constants - are these useful? -ISO_8601 -EPOCH_NANOSECONDS -EPOCH_MICROSECONDS -EPOCH_MILLISECONDS -EPOCH_SECONDS -EPOCH_MINUTES -EPOCH_HOURS -RELATIVE_NANOSECONDS # <-- should we allow implicit offset=None? -RELATIVE_MICROSECONDS # <-- should we allow implicit offset=None? -RELATIVE_MILLISECONDS # <-- should we allow implicit offset=None? -RELATIVE_SECONDS # <-- should we allow implicit offset=None? -RELATIVE_MINUTES # <-- should we allow implicit offset=None? -RELATIVE_HOURS # <-- should we allow implicit offset=None? - -# TypedTimeDomain -Epoch("nanoseconds") -Epoch("microseconds") -Epoch("milliseconds") -Epoch("seconds") -Epoch("minutes") -Epoch("hours") -Relative("nanoseconds") # <-- should we allow implicit offset=None? -Relative("microseconds") # <-- should we allow implicit offset=None? -Relative("milliseconds") # <-- should we allow implicit offset=None? -Relative("seconds") # <-- should we allow implicit offset=None? -Relative("minutes") # <-- should we allow implicit offset=None? -Relative("hours") # <-- should we allow implicit offset=None? -Relative("nanoseconds", offset=15) -Relative("microseconds", offset=15) -Relative("milliseconds", offset=15) -Relative("seconds", offset=15) -Relative("minutes", offset=15) -Relative("hours", offset=15) -Custom(r"yyyy-MM-dd[T]hh:mm:ss") -Custom(r"MM-dd[T]hh:mm:ss", default_year=2024) -""" +""" """ from __future__ import annotations from dataclasses import dataclass from types import MappingProxyType from typing import Literal, Mapping +import warnings from typing_extensions import TypeAlias from nominal._api.combined import ingest_api @@ -77,13 +24,10 @@ class Epoch: @dataclass(frozen=True) class Relative: unit: _LiteralTimeUnit - offset: int | None = None - """Offset from the beginning of a data collection. The offset must be in the same units as the timestamp type itself.""" - # TODO(alkasm): is ^ true or is it just nanoseconds? - # TODO(alkasm): may be okay with offset=0, but may need to detect presence? - # the backend allows for None on the first upload, but may error on the second one - # but if we default to 0, it may always work and overwrite - so None helps prevent? - # or we can just disallow not specifying the offset + offset: int + """The time offset from the beginning of a data collection.""" + offset_units: _LiteralTimeUnit | None = None + """The units of the offset. If None, assumes the same units as the relative timestamp `unit`.""" @dataclass(frozen=True) @@ -99,12 +43,6 @@ class Custom: EPOCH_SECONDS = Epoch("seconds") EPOCH_MINUTES = Epoch("minutes") EPOCH_HOURS = Epoch("hours") -RELATIVE_NANOSECONDS = Relative("nanoseconds") -RELATIVE_MICROSECONDS = Relative("microseconds") -RELATIVE_MILLISECONDS = Relative("milliseconds") -RELATIVE_SECONDS = Relative("seconds") -RELATIVE_MINUTES = Relative("minutes") -RELATIVE_HOURS = Relative("hours") _LiteralTimeUnit: TypeAlias = Literal[ "nanoseconds", @@ -125,7 +63,7 @@ class Custom: "epoch_hours", ] -_LiteralRelative: TypeAlias = Literal[ +_LiteralRelativeDeprecated: TypeAlias = Literal[ "relative_nanoseconds", "relative_microseconds", "relative_milliseconds", @@ -134,10 +72,8 @@ class Custom: "relative_hours", ] - TypedTimeDomain: TypeAlias = Iso8601 | Epoch | Relative | Custom -_LiteralTimeDomain: TypeAlias = _LiteralAbsolute | _LiteralRelative -_AnyTimeDomain: TypeAlias = TypedTimeDomain | _LiteralTimeDomain +_AnyTimeDomain: TypeAlias = TypedTimeDomain | _LiteralAbsolute | _LiteralRelativeDeprecated def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: @@ -145,11 +81,15 @@ def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: return domain if not isinstance(domain, str): raise TypeError(f"timestamp type {domain} must be a string or an instance of one of: {TypedTimeDomain}") + if domain.startswith("relative_"): + warnings.warn( + "specifying 'relative_{unit}' as a string is deprecated and will be removed in a future version: use `nm.timedomain.Relative` instead. " + "for example: instead of 'relative_seconds', `use nm.timedomain.Relative('seconds', offset=0)`. ", + "until this is removed, we implicitly assume offset=0.", + UserWarning, + ) if domain not in _str_to_type: raise ValueError(f"string time domains must be one of: {_str_to_type.keys()}") - if domain.startswith("relative_"): - # see TODO in class Relative if we want to deprecate implicit offset=None - pass return _str_to_type[domain] @@ -168,7 +108,7 @@ def _to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: raise TypeError(f"invalid time domain type: {type(domain)}") -_str_to_type: Mapping[_LiteralTimeDomain, Iso8601 | Epoch | Relative] = MappingProxyType( +_str_to_type: Mapping[_LiteralAbsolute | _LiteralRelativeDeprecated, Iso8601 | Epoch | Relative] = MappingProxyType( { "iso_8601": ISO_8601, "epoch_nanoseconds": EPOCH_NANOSECONDS, @@ -176,60 +116,12 @@ def _to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: "epoch_milliseconds": EPOCH_MILLISECONDS, "epoch_seconds": EPOCH_SECONDS, "epoch_minutes": EPOCH_MINUTES, - "epoch_hours": EPOCH_HOURS, - "relative_nanoseconds": RELATIVE_NANOSECONDS, - "relative_microseconds": RELATIVE_MICROSECONDS, - "relative_milliseconds": RELATIVE_MILLISECONDS, - "relative_seconds": RELATIVE_SECONDS, - "relative_minutes": RELATIVE_MINUTES, - "relative_hours": RELATIVE_HOURS, + "relative_hours": EPOCH_HOURS, + "relative_nanoseconds": Relative("nanoseconds", offset=0), + "relative_microseconds": Relative("microseconds", offset=0), + "relative_milliseconds": Relative("milliseconds", offset=0), + "relative_seconds": Relative("seconds", offset=0), + "relative_minutes": Relative("minutes", offset=0), + "relative_hours": Relative("hours", offset=0), } ) - -""" -Exploration: - -# winner: mix of strings + types, but advertise with singletons -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.iso_8601) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.absolute_nanoseconds) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Relative("nanoseconds", offset=15)) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Custom(r"yyyy-MM-dd[T]hh:mm:ss")) - -# current: mix of strings + types: relative offsets not supported yet, would need to create a new type for it -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", "iso_8601") -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", "epoch_nanoseconds") -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", "relative_nanoseconds") -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.CustomTimestampFormat(r"yyyy-MM-dd[T]hh:mm:ss")) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.RelativeTimestampFormat("nanoseconds", offset=15)) - -# strongly typed: types and docstrings are extremely clear, allows more flexibility per-type -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Iso8601()) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Absolute("ns")) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Relative("nanoseconds", offset=15)) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Custom(r"yyyy-MM-dd[T]hh:mm:ss")) - -# flexible factory function: multiple behaviors depending on how you call it; difficult to document but easy to read -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain("iso_8601")) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain("absolute", "ns")) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain("relative", "ns", offset=15)) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain("custom", r"yyyy-MM-dd[T]hh:mm:ss")) - -# mix of strings + types, but advertise with singletons -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.iso_8601) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.absolute_nanoseconds) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Relative("nanoseconds", offset=15)) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.Custom(r"yyyy-MM-dd[T]hh:mm:ss")) - -# factory functions for each type - pretty chill but wordier than strings -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.iso_8601()) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.absolute_nanoseconds()) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.relative_nanoseconds(offset=15)) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.custom(r"yyyy-MM-dd[T]hh:mm:ss")) - -# singletons (with transforming class methods) - too magic, custom() behaves differently -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.iso_8601) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.absolute_nanoseconds) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.relative_nanoseconds) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.relative_nanoseconds.offset(15)) -dataset = upload_csv("path/to/file.csv", "dataset", "timestamp", nm.time_domain.custom(r"yyyy-MM-dd[T]hh:mm:ss")) -""" From d62bd8e1589a7431dc4d91e697733ceaeb79bdd8 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 17:24:59 -0400 Subject: [PATCH 03/51] fix relative offset handling --- nominal/timedomain.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/nominal/timedomain.py b/nominal/timedomain.py index 35e0341e..d4f6732f 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -2,9 +2,11 @@ from __future__ import annotations from dataclasses import dataclass +from datetime import datetime from types import MappingProxyType from typing import Literal, Mapping import warnings +import numpy as np from typing_extensions import TypeAlias from nominal._api.combined import ingest_api @@ -24,10 +26,8 @@ class Epoch: @dataclass(frozen=True) class Relative: unit: _LiteralTimeUnit - offset: int - """The time offset from the beginning of a data collection.""" - offset_units: _LiteralTimeUnit | None = None - """The units of the offset. If None, assumes the same units as the relative timestamp `unit`.""" + start: datetime | IntegralNanosecondsUTC + """The starting time to which all relatives times are relative to.""" @dataclass(frozen=True) @@ -84,8 +84,8 @@ def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: if domain.startswith("relative_"): warnings.warn( "specifying 'relative_{unit}' as a string is deprecated and will be removed in a future version: use `nm.timedomain.Relative` instead. " - "for example: instead of 'relative_seconds', `use nm.timedomain.Relative('seconds', offset=0)`. ", - "until this is removed, we implicitly assume offset=0.", + "for example: instead of 'relative_seconds', `use nm.timedomain.Relative('seconds', offset=0)`. " + "until this is removed, we implicitly assume offset=None.", UserWarning, ) if domain not in _str_to_type: @@ -94,20 +94,41 @@ def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: def _to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: + """ + Note: datetimes are serialized a ISO-8601 strings, with up-to nanosecond precision. Ref: + - https://github.com/palantir/conjure/blob/master/docs/concepts.md#built-in-types + - https://github.com/palantir/conjure/pull/1643 + """ + if isinstance(domain, Iso8601): return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(iso8601=ingest_api.Iso8601Timestamp())) if isinstance(domain, Epoch): - epoch = ingest_api.EpochTimestamp(time_unit=domain.unit) + epoch = ingest_api.EpochTimestamp(time_unit=_time_unit_to_conjure(domain.unit)) return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(epoch_of_time_unit=epoch)) if isinstance(domain, Custom): fmt = ingest_api.CustomTimestamp(format=domain.format, default_year=domain.default_year) return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(custom_format=fmt)) if isinstance(domain, Relative): - relative = ingest_api.RelativeTimestamp(time_unit=domain.unit, offset=domain.offset) + relative = ingest_api.RelativeTimestamp( + time_unit=_time_unit_to_conjure(domain.unit), offset=_flexible_to_iso8601(domain.start) + ) return ingest_api.TimestampType(relative=relative) raise TypeError(f"invalid time domain type: {type(domain)}") +def _flexible_to_iso8601(ts: datetime | IntegralNanosecondsUTC) -> str: + """datetime.datetime objects are only microsecond-precise, so we use numpy's datetime64[ns] for nanosecond precision.""" + if isinstance(ts, datetime): + return ts.isoformat() + if isinstance(ts, int): + return str(np.datetime64(ts, "ns")) + raise TypeError(f"timestamp {ts} must be a datetime or an integer") + + +def _time_unit_to_conjure(unit: _LiteralTimeUnit) -> ingest_api.TimeUnit: + return ingest_api.TimeUnit[unit.upper()] + + _str_to_type: Mapping[_LiteralAbsolute | _LiteralRelativeDeprecated, Iso8601 | Epoch | Relative] = MappingProxyType( { "iso_8601": ISO_8601, From d247c97956f67ca96ab1e2cac1f1d46711a52e30 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 17:26:39 -0400 Subject: [PATCH 04/51] fix offset -> start, and fix key misnaming --- nominal/timedomain.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nominal/timedomain.py b/nominal/timedomain.py index d4f6732f..27f3aadd 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -137,12 +137,12 @@ def _time_unit_to_conjure(unit: _LiteralTimeUnit) -> ingest_api.TimeUnit: "epoch_milliseconds": EPOCH_MILLISECONDS, "epoch_seconds": EPOCH_SECONDS, "epoch_minutes": EPOCH_MINUTES, - "relative_hours": EPOCH_HOURS, - "relative_nanoseconds": Relative("nanoseconds", offset=0), - "relative_microseconds": Relative("microseconds", offset=0), - "relative_milliseconds": Relative("milliseconds", offset=0), - "relative_seconds": Relative("seconds", offset=0), - "relative_minutes": Relative("minutes", offset=0), - "relative_hours": Relative("hours", offset=0), + "epoch_hours": EPOCH_HOURS, + "relative_nanoseconds": Relative("nanoseconds", start=0), + "relative_microseconds": Relative("microseconds", start=0), + "relative_milliseconds": Relative("milliseconds", start=0), + "relative_seconds": Relative("seconds", start=0), + "relative_minutes": Relative("minutes", start=0), + "relative_hours": Relative("hours", start=0), } ) From c77f85beeef1f3e07f60abf7bcc97383a316b891 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 17:49:35 -0400 Subject: [PATCH 05/51] reorganize time utils, fix timestamp handling --- nominal/_timeutils.py | 87 ++++++++++++++++++++++++++++++ nominal/_utils.py | 107 +------------------------------------ nominal/cli/dataset.py | 12 +---- nominal/cli/run.py | 2 +- nominal/core.py | 28 ++++------ nominal/nominal.py | 52 ++++++++---------- nominal/timedomain.py | 1 + tests/e2e/test_toplevel.py | 20 +++---- 8 files changed, 134 insertions(+), 175 deletions(-) create mode 100644 nominal/_timeutils.py diff --git a/nominal/_timeutils.py b/nominal/_timeutils.py new file mode 100644 index 00000000..0b34177f --- /dev/null +++ b/nominal/_timeutils.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Literal, Union + +import dateutil.parser +from typing_extensions import TypeAlias # typing.TypeAlias in 3.10+ + +from ._api.combined import ingest_api, scout_run_api + +IntegralNanosecondsUTC = int + + +@dataclass +class CustomTimestampFormat: + format: str + default_year: int = 0 + + +# Using Union rather than the "|" operator due to https://github.com/python/mypy/issues/11665. +TimestampColumnType: TypeAlias = Union[ + Literal[ + "iso_8601", + "epoch_days", + "epoch_hours", + "epoch_minutes", + "epoch_seconds", + "epoch_milliseconds", + "epoch_microseconds", + "epoch_nanoseconds", + "relative_days", + "relative_hours", + "relative_minutes", + "relative_seconds", + "relative_milliseconds", + "relative_microseconds", + "relative_nanoseconds", + ], + CustomTimestampFormat, +] + + +def _flexible_time_to_conjure_scout_run_api(timestamp: datetime | IntegralNanosecondsUTC) -> scout_run_api.UtcTimestamp: + seconds, nanos = _flexible_time_to_seconds_nanos(timestamp) + return scout_run_api.UtcTimestamp(seconds_since_epoch=seconds, offset_nanoseconds=nanos) + + +def _flexible_time_to_conjure_ingest_api( + timestamp: datetime | IntegralNanosecondsUTC, +) -> ingest_api.UtcTimestamp: + seconds, nanos = _flexible_time_to_seconds_nanos(timestamp) + return ingest_api.UtcTimestamp(seconds_since_epoch=seconds, offset_nanoseconds=nanos) + + +def _flexible_time_to_seconds_nanos( + timestamp: datetime | IntegralNanosecondsUTC, +) -> tuple[int, int]: + if isinstance(timestamp, datetime): + return _datetime_to_seconds_nanos(timestamp) + elif isinstance(timestamp, IntegralNanosecondsUTC): + return divmod(timestamp, 1_000_000_000) + raise TypeError(f"expected {datetime} or {IntegralNanosecondsUTC}, got {type(timestamp)}") + + +def _conjure_time_to_integral_nanoseconds(ts: scout_run_api.UtcTimestamp) -> IntegralNanosecondsUTC: + return ts.seconds_since_epoch * 1_000_000_000 + (ts.offset_nanoseconds or 0) + + +def _datetime_to_seconds_nanos(dt: datetime) -> tuple[int, int]: + dt = dt.astimezone(timezone.utc) + seconds = int(dt.timestamp()) + nanos = dt.microsecond * 1000 + return seconds, nanos + + +def _datetime_to_integral_nanoseconds(dt: datetime) -> IntegralNanosecondsUTC: + seconds, nanos = _datetime_to_seconds_nanos(dt) + return seconds * 1_000_000_000 + nanos + + +def _parse_timestamp(ts: str | datetime | IntegralNanosecondsUTC) -> IntegralNanosecondsUTC: + if isinstance(ts, int): + return ts + if isinstance(ts, str): + ts = dateutil.parser.parse(ts) + return _datetime_to_integral_nanoseconds(ts) diff --git a/nominal/_utils.py b/nominal/_utils.py index bef4c10c..c83ae486 100644 --- a/nominal/_utils.py +++ b/nominal/_utils.py @@ -4,119 +4,14 @@ import mimetypes import os from contextlib import contextmanager -from dataclasses import dataclass -from datetime import datetime, timezone from pathlib import Path -from typing import BinaryIO, Iterable, Iterator, Literal, NamedTuple, Type, TypeVar, Union - -import dateutil.parser -from typing_extensions import TypeAlias # typing.TypeAlias in 3.10+ - -from ._api.combined import ingest_api, scout_run_api +from typing import BinaryIO, Iterable, Iterator, NamedTuple, TypeVar logger = logging.getLogger(__name__) -IntegralNanosecondsUTC = int T = TypeVar("T") -@dataclass -class CustomTimestampFormat: - format: str - default_year: int = 0 - - -# Using Union rather than the "|" operator due to https://github.com/python/mypy/issues/11665. -TimestampColumnType: TypeAlias = Union[ - Literal[ - "iso_8601", - "epoch_days", - "epoch_hours", - "epoch_minutes", - "epoch_seconds", - "epoch_milliseconds", - "epoch_microseconds", - "epoch_nanoseconds", - "relative_days", - "relative_hours", - "relative_minutes", - "relative_seconds", - "relative_milliseconds", - "relative_microseconds", - "relative_nanoseconds", - ], - CustomTimestampFormat, -] - - -def _timestamp_type_to_conjure_ingest_api( - ts_type: TimestampColumnType, -) -> ingest_api.TimestampType: - if isinstance(ts_type, CustomTimestampFormat): - return ingest_api.TimestampType( - absolute=ingest_api.AbsoluteTimestamp( - custom_format=ingest_api.CustomTimestamp(format=ts_type.format, default_year=ts_type.default_year) - ) - ) - elif ts_type == "iso_8601": - return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(iso8601=ingest_api.Iso8601Timestamp())) - relation, unit = ts_type.split("_", 1) - time_unit = ingest_api.TimeUnit[unit.upper()] - if relation == "epoch": - return ingest_api.TimestampType( - absolute=ingest_api.AbsoluteTimestamp(epoch_of_time_unit=ingest_api.EpochTimestamp(time_unit=time_unit)) - ) - elif relation == "relative": - return ingest_api.TimestampType(relative=ingest_api.RelativeTimestamp(time_unit=time_unit)) - raise ValueError(f"invalid timestamp type: {ts_type}") - - -def _flexible_time_to_conjure_scout_run_api(timestamp: datetime | IntegralNanosecondsUTC) -> scout_run_api.UtcTimestamp: - seconds, nanos = _flexible_time_to_seconds_nanos(timestamp) - return scout_run_api.UtcTimestamp(seconds_since_epoch=seconds, offset_nanoseconds=nanos) - - -def _flexible_time_to_conjure_ingest_api( - timestamp: datetime | IntegralNanosecondsUTC, -) -> ingest_api.UtcTimestamp: - seconds, nanos = _flexible_time_to_seconds_nanos(timestamp) - return ingest_api.UtcTimestamp(seconds_since_epoch=seconds, offset_nanoseconds=nanos) - - -def _flexible_time_to_seconds_nanos( - timestamp: datetime | IntegralNanosecondsUTC, -) -> tuple[int, int]: - if isinstance(timestamp, datetime): - return _datetime_to_seconds_nanos(timestamp) - elif isinstance(timestamp, IntegralNanosecondsUTC): - return divmod(timestamp, 1_000_000_000) - raise TypeError(f"expected {datetime} or {IntegralNanosecondsUTC}, got {type(timestamp)}") - - -def _conjure_time_to_integral_nanoseconds(ts: scout_run_api.UtcTimestamp) -> IntegralNanosecondsUTC: - return ts.seconds_since_epoch * 1_000_000_000 + (ts.offset_nanoseconds or 0) - - -def _datetime_to_seconds_nanos(dt: datetime) -> tuple[int, int]: - dt = dt.astimezone(timezone.utc) - seconds = int(dt.timestamp()) - nanos = dt.microsecond * 1000 - return seconds, nanos - - -def _datetime_to_integral_nanoseconds(dt: datetime) -> IntegralNanosecondsUTC: - seconds, nanos = _datetime_to_seconds_nanos(dt) - return seconds * 1_000_000_000 + nanos - - -def _parse_timestamp(ts: str | datetime | IntegralNanosecondsUTC) -> IntegralNanosecondsUTC: - if isinstance(ts, int): - return ts - if isinstance(ts, str): - ts = dateutil.parser.parse(ts) - return _datetime_to_integral_nanoseconds(ts) - - def construct_user_agent_string() -> str: """Constructs a user-agent string with system & Python metadata. E.g.: nominal-python/1.0.0b0 (macOS-14.4-arm64-arm-64bit) cpython/3.12.4 diff --git a/nominal/cli/dataset.py b/nominal/cli/dataset.py index 1da4f392..ed621053 100644 --- a/nominal/cli/dataset.py +++ b/nominal/cli/dataset.py @@ -6,6 +6,7 @@ from ..nominal import _upload_csv from ._utils import BASE_URL_OPTION, TOKEN_OPTION, get_client +from ..timedomain import _LiteralAbsolute @click.group(name="dataset") @@ -43,16 +44,7 @@ def upload_csv( name: str, file: str, timestamp_column: str, - timestamp_type: Literal[ - "iso_8601", - "epoch_days", - "epoch_hours", - "epoch_minutes", - "epoch_seconds", - "epoch_milliseconds", - "epoch_microseconds", - "epoch_nanoseconds", - ], + timestamp_type: _LiteralAbsolute, desc: str | None, wait: bool, base_url: str, diff --git a/nominal/cli/run.py b/nominal/cli/run.py index 4918bd24..a1c85075 100644 --- a/nominal/cli/run.py +++ b/nominal/cli/run.py @@ -4,7 +4,7 @@ import click -from .._utils import _parse_timestamp +from .._timeutils import _parse_timestamp from ._utils import BASE_URL_OPTION, TOKEN_OPTION, get_client diff --git a/nominal/core.py b/nominal/core.py index b73e02db..18fca953 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -15,6 +15,7 @@ from typing_extensions import Self # typing.Self in 3.11+ from nominal import _config +from .timedomain import TypedTimeDomain, _to_conjure_ingest_api from ._api.combined import ( attachments_api, @@ -27,19 +28,14 @@ upload_api, ) from ._multipart import put_multipart_upload -from ._utils import ( +from ._timeutils import ( CustomTimestampFormat, - FileType, - FileTypes, IntegralNanosecondsUTC, - TimestampColumnType, _conjure_time_to_integral_nanoseconds, _flexible_time_to_conjure_ingest_api, _flexible_time_to_conjure_scout_run_api, - _timestamp_type_to_conjure_ingest_api, - construct_user_agent_string, - update_dataclass, ) +from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed __all__ = [ @@ -250,7 +246,7 @@ def update( update_dataclass(self, dataset, fields=self.__dataclass_fields__) return self - def add_csv_to_dataset(self, path: Path | str, timestamp_column: str, timestamp_type: TimestampColumnType) -> None: + def add_csv_to_dataset(self, path: Path | str, timestamp_column: str, timestamp_type: TypedTimeDomain) -> None: """Append to a dataset from a csv on-disk.""" path, file_type = _verify_csv_path(path) with open(path, "rb") as csv_file: @@ -260,7 +256,7 @@ def add_to_dataset_from_io( self, dataset: BinaryIO, timestamp_column: str, - timestamp_type: TimestampColumnType, + timestamp_type: TypedTimeDomain, file_type: tuple[str, str] | FileType = FileTypes.CSV, ) -> None: """Append to a dataset from a file-like object. @@ -268,12 +264,6 @@ def add_to_dataset_from_io( file_type: a (extension, mimetype) pair describing the type of file. """ - if not isinstance(timestamp_type, CustomTimestampFormat): - if timestamp_type.startswith("relative"): - raise ValueError( - "multifile datasets with relative timestamps are not yet supported by the client library" - ) - if isinstance(dataset, TextIOBase): raise TypeError(f"dataset {dataset!r} must be open in binary mode, rather than text mode") @@ -293,7 +283,7 @@ def add_to_dataset_from_io( source_metadata=ingest_api.IngestSourceMetadata( timestamp_metadata=ingest_api.TimestampMetadata( series_name=timestamp_column, - timestamp_type=_timestamp_type_to_conjure_ingest_api(timestamp_type), + timestamp_type=_to_conjure_ingest_api(timestamp_type), ), ), ) @@ -592,7 +582,7 @@ def create_csv_dataset( path: Path | str, name: str | None, timestamp_column: str, - timestamp_type: TimestampColumnType, + timestamp_type: TypedTimeDomain, description: str | None = None, *, labels: Sequence[str] = (), @@ -624,7 +614,7 @@ def create_dataset_from_io( dataset: BinaryIO, name: str, timestamp_column: str, - timestamp_type: TimestampColumnType, + timestamp_type: TypedTimeDomain, file_type: tuple[str, str] | FileType = FileTypes.CSV, description: str | None = None, *, @@ -664,7 +654,7 @@ def create_dataset_from_io( source_metadata=ingest_api.IngestSourceMetadata( timestamp_metadata=ingest_api.TimestampMetadata( series_name=timestamp_column, - timestamp_type=_timestamp_type_to_conjure_ingest_api(timestamp_type), + timestamp_type=_to_conjure_ingest_api(timestamp_type), ), ), ) diff --git a/nominal/nominal.py b/nominal/nominal.py index deab2e88..eb3567e1 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -4,18 +4,12 @@ from functools import cache from pathlib import Path from threading import Thread -from typing import TYPE_CHECKING, BinaryIO, Literal +from typing import TYPE_CHECKING, BinaryIO from nominal import _config -from ._utils import ( - CustomTimestampFormat, - FileType, - FileTypes, - IntegralNanosecondsUTC, - _parse_timestamp, - reader_writer, -) +from ._timeutils import CustomTimestampFormat, IntegralNanosecondsUTC, _parse_timestamp +from ._utils import FileType, FileTypes, reader_writer from .core import Attachment, Dataset, NominalClient, Run, Video from . import timedomain @@ -212,14 +206,13 @@ def create_run_csv( column. """ ... - raise RuntimeError("fix this") - try: - start, end = _get_start_end_timestamp_csv_file(file, timestamp_column, timestamp_type) - except ValueError as e: + typed_timestamp_type = timedomain._make_typed_time_domain(timestamp_type) + if not isinstance(typed_timestamp_type, (timedomain.Iso8601, timedomain.Epoch)): raise ValueError( - "`create_run_csv()` only supports absolute timestamps: use `upload_dataset()` and `create_run()` instead" - ) from e - dataset = upload_csv(file, f"Dataset for Run: {name}", timestamp_column, timestamp_type) + "`create_run_csv()` only supports iso8601 or epoch timestamps: use `upload_dataset()` and `create_run()` instead" + ) + start, end = _get_start_end_timestamp_csv_file(file, timestamp_column, typed_timestamp_type) + dataset = upload_csv(file, f"Dataset for Run: {name}", timestamp_column, typed_timestamp_type) run = create_run(name, start=start, end=end, description=description) run.add_dataset("dataset", dataset) return run @@ -303,32 +296,33 @@ def get_video(rid: str) -> Video: def _get_start_end_timestamp_csv_file( - file: Path | str, timestamp_column: str, timestamp_type: timedomain._AnyTimeDomain + file: Path | str, + timestamp_column: str, + timestamp_type: timedomain.Iso8601 | timedomain.Epoch, ) -> tuple[IntegralNanosecondsUTC, IntegralNanosecondsUTC]: import pandas as pd df = pd.read_csv(file) ts_col = df[timestamp_column] - if isinstance(timestamp_type, CustomTimestampFormat) or timestamp_type.startswith("relative_"): - raise ValueError("timestamp_type must be 'iso_8601' or 'epoch_{unit}'") - if timestamp_type == "iso_8601": + + if isinstance(timestamp_type, timedomain.Iso8601): ts_col = pd.to_datetime(ts_col) - else: - _, unit = timestamp_type.split("_") - pd_units = { - "days": "D", + elif isinstance(timestamp_type, timedomain.Epoch): + pd_units: dict[timedomain._LiteralTimeUnit, str] = { + "hours": "s", # hours are not supported by pandas + "minutes": "s", # minutes are not supported by pandas "seconds": "s", "milliseconds": "ms", "microseconds": "us", "nanoseconds": "ns", } - if unit == "hours": - unit = "seconds" + if timestamp_type.unit == "hours": ts_col *= 60 * 60 - elif unit == "minutes": - unit = "seconds" + elif timestamp_type.unit == "minutes": ts_col *= 60 - ts_col = pd.to_datetime(ts_col, unit=pd_units[unit]) + ts_col = pd.to_datetime(ts_col, unit=pd_units[timestamp_type.unit]) + else: + raise ValueError(f"unhandled timestamp type {timestamp_type}") start, end = ts_col.min(), ts_col.max() return ( diff --git a/nominal/timedomain.py b/nominal/timedomain.py index 27f3aadd..1475c3a2 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -73,6 +73,7 @@ class Custom: ] TypedTimeDomain: TypeAlias = Iso8601 | Epoch | Relative | Custom +_AbsoluteTimeDomain: TypeAlias = Iso8601 | Epoch | Custom | _LiteralAbsolute _AnyTimeDomain: TypeAlias = TypedTimeDomain | _LiteralAbsolute | _LiteralRelativeDeprecated diff --git a/tests/e2e/test_toplevel.py b/tests/e2e/test_toplevel.py index 9dd763e6..a69fc510 100644 --- a/tests/e2e/test_toplevel.py +++ b/tests/e2e/test_toplevel.py @@ -7,7 +7,7 @@ import polars as pl import nominal as nm -from nominal import _utils +from nominal import _timeutils from . import _create_random_start_end @@ -112,8 +112,8 @@ def test_create_run(): assert run.rid != "" assert run.name == name assert run.description == desc - assert run.start == _utils._datetime_to_integral_nanoseconds(start) - assert run.end == _utils._datetime_to_integral_nanoseconds(end) + assert run.start == _timeutils._datetime_to_integral_nanoseconds(start) + assert run.end == _timeutils._datetime_to_integral_nanoseconds(end) assert len(run.properties) == 0 assert len(run.labels) == 0 @@ -130,8 +130,8 @@ def test_create_run_csv(csv_data): assert run.rid != "" assert run.name == name assert run.description == desc - assert run.start == _utils._datetime_to_integral_nanoseconds(start) - assert run.end == _utils._datetime_to_integral_nanoseconds(end) + assert run.start == _timeutils._datetime_to_integral_nanoseconds(start) + assert run.end == _timeutils._datetime_to_integral_nanoseconds(end) assert len(run.properties) == 0 assert len(run.labels) == 0 @@ -156,8 +156,8 @@ def test_get_run(): assert run2.rid == run.rid != "" assert run2.name == run.name == name assert run2.description == run.description == desc - assert run2.start == run.start == _utils._parse_timestamp(start) - assert run2.end == run.end == _utils._parse_timestamp(end) + assert run2.start == run.start == _timeutils._parse_timestamp(start) + assert run2.end == run.end == _timeutils._parse_timestamp(end) assert run2.properties == run.properties == {} assert run2.labels == run.labels == () @@ -175,8 +175,8 @@ def test_search_runs(): assert run2.rid == run.rid != "" assert run2.name == run.name == name assert run2.description == run.description == desc - assert run2.start == run.start == _utils._parse_timestamp(start) - assert run2.end == run.end == _utils._parse_timestamp(end) + assert run2.start == run.start == _timeutils._parse_timestamp(start) + assert run2.end == run.end == _timeutils._parse_timestamp(end) assert run2.properties == run.properties == {} assert run2.labels == run.labels == () @@ -217,7 +217,7 @@ def test_download_attachment(csv_data): with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): at = nm.upload_attachment("fake_path.csv", at_title, at_desc) - with _utils.reader_writer() as (r, w): + with _timeutils.reader_writer() as (r, w): with mock.patch("builtins.open", return_value=w): nm.download_attachment(at.rid, "fake_path.csv") assert r.read() == csv_data From 4082588928427ae8d2376c3d5e9db4c2ed7c1f53 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 18:09:47 -0400 Subject: [PATCH 06/51] remove extraneous ellipses --- nominal/nominal.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nominal/nominal.py b/nominal/nominal.py index eb3567e1..cfadcedc 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -205,7 +205,6 @@ def create_run_csv( The run start and end times are created from the minimum and maximum timestamps in the CSV file in the timestamp column. """ - ... typed_timestamp_type = timedomain._make_typed_time_domain(timestamp_type) if not isinstance(typed_timestamp_type, (timedomain.Iso8601, timedomain.Epoch)): raise ValueError( From ff86534f079e47fd29baaa549592ca3cebac3f94 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 18:10:01 -0400 Subject: [PATCH 07/51] fmt fix --- nominal/cli/dataset.py | 2 +- nominal/core.py | 2 +- nominal/nominal.py | 2 +- nominal/timedomain.py | 5 ++++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/nominal/cli/dataset.py b/nominal/cli/dataset.py index ed621053..f3366a0a 100644 --- a/nominal/cli/dataset.py +++ b/nominal/cli/dataset.py @@ -5,8 +5,8 @@ import click from ..nominal import _upload_csv -from ._utils import BASE_URL_OPTION, TOKEN_OPTION, get_client from ..timedomain import _LiteralAbsolute +from ._utils import BASE_URL_OPTION, TOKEN_OPTION, get_client @click.group(name="dataset") diff --git a/nominal/core.py b/nominal/core.py index 18fca953..1a95aa7a 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -15,7 +15,6 @@ from typing_extensions import Self # typing.Self in 3.11+ from nominal import _config -from .timedomain import TypedTimeDomain, _to_conjure_ingest_api from ._api.combined import ( attachments_api, @@ -37,6 +36,7 @@ ) from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed +from .timedomain import TypedTimeDomain, _to_conjure_ingest_api __all__ = [ "NominalClient", diff --git a/nominal/nominal.py b/nominal/nominal.py index cfadcedc..74670a53 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -8,10 +8,10 @@ from nominal import _config +from . import timedomain from ._timeutils import CustomTimestampFormat, IntegralNanosecondsUTC, _parse_timestamp from ._utils import FileType, FileTypes, reader_writer from .core import Attachment, Dataset, NominalClient, Run, Video -from . import timedomain if TYPE_CHECKING: import pandas as pd diff --git a/nominal/timedomain.py b/nominal/timedomain.py index 1475c3a2..29faba09 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -1,13 +1,16 @@ """ """ from __future__ import annotations + +import warnings from dataclasses import dataclass from datetime import datetime from types import MappingProxyType from typing import Literal, Mapping -import warnings + import numpy as np from typing_extensions import TypeAlias + from nominal._api.combined import ingest_api IntegralNanosecondsUTC: TypeAlias = int From 588ad32f29dbcb004dfc8fcb41ff56d8ed87d70e Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 18:16:54 -0400 Subject: [PATCH 08/51] remove old types --- nominal/_timeutils.py | 32 -------------------------------- nominal/core.py | 2 -- nominal/nominal.py | 2 +- nominal/timedomain.py | 5 ++--- 4 files changed, 3 insertions(+), 38 deletions(-) diff --git a/nominal/_timeutils.py b/nominal/_timeutils.py index 0b34177f..bb27cd59 100644 --- a/nominal/_timeutils.py +++ b/nominal/_timeutils.py @@ -1,46 +1,14 @@ from __future__ import annotations -from dataclasses import dataclass from datetime import datetime, timezone -from typing import Literal, Union import dateutil.parser -from typing_extensions import TypeAlias # typing.TypeAlias in 3.10+ from ._api.combined import ingest_api, scout_run_api IntegralNanosecondsUTC = int -@dataclass -class CustomTimestampFormat: - format: str - default_year: int = 0 - - -# Using Union rather than the "|" operator due to https://github.com/python/mypy/issues/11665. -TimestampColumnType: TypeAlias = Union[ - Literal[ - "iso_8601", - "epoch_days", - "epoch_hours", - "epoch_minutes", - "epoch_seconds", - "epoch_milliseconds", - "epoch_microseconds", - "epoch_nanoseconds", - "relative_days", - "relative_hours", - "relative_minutes", - "relative_seconds", - "relative_milliseconds", - "relative_microseconds", - "relative_nanoseconds", - ], - CustomTimestampFormat, -] - - def _flexible_time_to_conjure_scout_run_api(timestamp: datetime | IntegralNanosecondsUTC) -> scout_run_api.UtcTimestamp: seconds, nanos = _flexible_time_to_seconds_nanos(timestamp) return scout_run_api.UtcTimestamp(seconds_since_epoch=seconds, offset_nanoseconds=nanos) diff --git a/nominal/core.py b/nominal/core.py index 1a95aa7a..7df66f7a 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -28,7 +28,6 @@ ) from ._multipart import put_multipart_upload from ._timeutils import ( - CustomTimestampFormat, IntegralNanosecondsUTC, _conjure_time_to_integral_nanoseconds, _flexible_time_to_conjure_ingest_api, @@ -45,7 +44,6 @@ "Attachment", "Video", "IntegralNanosecondsUTC", - "CustomTimestampFormat", ] diff --git a/nominal/nominal.py b/nominal/nominal.py index 74670a53..e2259101 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -9,7 +9,7 @@ from nominal import _config from . import timedomain -from ._timeutils import CustomTimestampFormat, IntegralNanosecondsUTC, _parse_timestamp +from ._timeutils import IntegralNanosecondsUTC, _parse_timestamp from ._utils import FileType, FileTypes, reader_writer from .core import Attachment, Dataset, NominalClient, Run, Video diff --git a/nominal/timedomain.py b/nominal/timedomain.py index 29faba09..b1cb1faf 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -11,9 +11,8 @@ import numpy as np from typing_extensions import TypeAlias -from nominal._api.combined import ingest_api - -IntegralNanosecondsUTC: TypeAlias = int +from ._api.combined import ingest_api +from ._timeutils import IntegralNanosecondsUTC @dataclass(frozen=True) From c71d8cf5c9b43631b2744f37ae670d0de09fdc13 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 22:12:13 -0400 Subject: [PATCH 09/51] fixed warning example --- nominal/timedomain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nominal/timedomain.py b/nominal/timedomain.py index b1cb1faf..e90b34c7 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -87,7 +87,7 @@ def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: if domain.startswith("relative_"): warnings.warn( "specifying 'relative_{unit}' as a string is deprecated and will be removed in a future version: use `nm.timedomain.Relative` instead. " - "for example: instead of 'relative_seconds', `use nm.timedomain.Relative('seconds', offset=0)`. " + "for example: instead of 'relative_seconds', `use nm.timedomain.Relative('seconds', start=datetime.now())`. " "until this is removed, we implicitly assume offset=None.", UserWarning, ) From 7e808a20bcf367f1ded47eb23aa210e585a48664 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 22:13:21 -0400 Subject: [PATCH 10/51] move part of warning into code comment --- nominal/timedomain.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nominal/timedomain.py b/nominal/timedomain.py index e90b34c7..2d5202e6 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -85,10 +85,10 @@ def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: if not isinstance(domain, str): raise TypeError(f"timestamp type {domain} must be a string or an instance of one of: {TypedTimeDomain}") if domain.startswith("relative_"): + # until this is completely removed, we implicitly assume offset=None in the APIs warnings.warn( "specifying 'relative_{unit}' as a string is deprecated and will be removed in a future version: use `nm.timedomain.Relative` instead. " - "for example: instead of 'relative_seconds', `use nm.timedomain.Relative('seconds', start=datetime.now())`. " - "until this is removed, we implicitly assume offset=None.", + "for example: instead of 'relative_seconds', `use nm.timedomain.Relative('seconds', start=datetime.now())`. ", UserWarning, ) if domain not in _str_to_type: From 8fb1d86f45d4e3269ba822c59718c94a1116f6dd Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 22:21:42 -0400 Subject: [PATCH 11/51] remove unused type --- nominal/timedomain.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nominal/timedomain.py b/nominal/timedomain.py index 2d5202e6..4751c601 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from datetime import datetime from types import MappingProxyType -from typing import Literal, Mapping +from typing import Literal, Mapping, Union import numpy as np from typing_extensions import TypeAlias @@ -75,8 +75,7 @@ class Custom: ] TypedTimeDomain: TypeAlias = Iso8601 | Epoch | Relative | Custom -_AbsoluteTimeDomain: TypeAlias = Iso8601 | Epoch | Custom | _LiteralAbsolute -_AnyTimeDomain: TypeAlias = TypedTimeDomain | _LiteralAbsolute | _LiteralRelativeDeprecated +_AnyTimeDomain: TypeAlias = Union[TypedTimeDomain, _LiteralAbsolute, _LiteralRelativeDeprecated] def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: From a4e8daa599cbcfa91011fb18ad9116f0a3743c07 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 23 Sep 2024 22:36:13 -0400 Subject: [PATCH 12/51] python 3.9 support --- nominal/timedomain.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/nominal/timedomain.py b/nominal/timedomain.py index 4751c601..5f8c69a3 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -1,5 +1,3 @@ -""" """ - from __future__ import annotations import warnings @@ -74,12 +72,12 @@ class Custom: "relative_hours", ] -TypedTimeDomain: TypeAlias = Iso8601 | Epoch | Relative | Custom +TypedTimeDomain: TypeAlias = Union[Iso8601, Epoch, Relative, Custom] _AnyTimeDomain: TypeAlias = Union[TypedTimeDomain, _LiteralAbsolute, _LiteralRelativeDeprecated] def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: - if isinstance(domain, TypedTimeDomain): + if isinstance(domain, (Iso8601, Epoch, Relative, Custom)): return domain if not isinstance(domain, str): raise TypeError(f"timestamp type {domain} must be a string or an instance of one of: {TypedTimeDomain}") From 3f2d7e571e39f02058a6809b9dc040902fd5189b Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 09:03:16 -0400 Subject: [PATCH 13/51] warning format --- nominal/timedomain.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nominal/timedomain.py b/nominal/timedomain.py index 5f8c69a3..3175cf16 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -85,7 +85,7 @@ def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: # until this is completely removed, we implicitly assume offset=None in the APIs warnings.warn( "specifying 'relative_{unit}' as a string is deprecated and will be removed in a future version: use `nm.timedomain.Relative` instead. " - "for example: instead of 'relative_seconds', `use nm.timedomain.Relative('seconds', start=datetime.now())`. ", + "for example: instead of 'relative_seconds', use `nm.timedomain.Relative('seconds', start=datetime.now())`. ", UserWarning, ) if domain not in _str_to_type: @@ -112,6 +112,7 @@ def _to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: relative = ingest_api.RelativeTimestamp( time_unit=_time_unit_to_conjure(domain.unit), offset=_flexible_to_iso8601(domain.start) ) + print(relative) return ingest_api.TimestampType(relative=relative) raise TypeError(f"invalid time domain type: {type(domain)}") From f1fcab24166ae0bd2020fac2f17965e83d0f81c3 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 09:09:07 -0400 Subject: [PATCH 14/51] update tests --- nominal/__init__.py | 1 + nominal/timedomain.py | 8 ++++---- tests/e2e/test_sdk.py | 4 ++-- tests/e2e/test_toplevel.py | 7 ++++--- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/nominal/__init__.py b/nominal/__init__.py index ef69ae31..98352fec 100644 --- a/nominal/__init__.py +++ b/nominal/__init__.py @@ -1,3 +1,4 @@ +from . import timedomain from .core import Attachment, Dataset, NominalClient, Run, Video from .nominal import ( create_run, diff --git a/nominal/timedomain.py b/nominal/timedomain.py index 3175cf16..ec4a6948 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -2,7 +2,7 @@ import warnings from dataclasses import dataclass -from datetime import datetime +from datetime import datetime, timezone from types import MappingProxyType from typing import Literal, Mapping, Union @@ -112,7 +112,6 @@ def _to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: relative = ingest_api.RelativeTimestamp( time_unit=_time_unit_to_conjure(domain.unit), offset=_flexible_to_iso8601(domain.start) ) - print(relative) return ingest_api.TimestampType(relative=relative) raise TypeError(f"invalid time domain type: {type(domain)}") @@ -120,9 +119,10 @@ def _to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: def _flexible_to_iso8601(ts: datetime | IntegralNanosecondsUTC) -> str: """datetime.datetime objects are only microsecond-precise, so we use numpy's datetime64[ns] for nanosecond precision.""" if isinstance(ts, datetime): - return ts.isoformat() + return ts.astimezone(tz=timezone.utc).isoformat() if isinstance(ts, int): - return str(np.datetime64(ts, "ns")) + # np.datetime64[ns] assumes UTC + return str(np.datetime64(ts, "ns")) + "Z" raise TypeError(f"timestamp {ts} must be a datetime or an integer") diff --git a/tests/e2e/test_sdk.py b/tests/e2e/test_sdk.py index 182df645..5a1b2d8c 100644 --- a/tests/e2e/test_sdk.py +++ b/tests/e2e/test_sdk.py @@ -69,11 +69,11 @@ def test_add_csv_to_dataset(csv_data, csv_data2): desc = f"TESTING sdk to add more data to a dataset {uuid4()}" with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): - ds = nm.upload_csv("fake_path.csv", name, "timestamp", "iso_8601", desc) + ds = nm.upload_csv("fake_path.csv", name, "timestamp", nm.timedomain.ISO_8601, desc) ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) with mock.patch("builtins.open", mock.mock_open(read_data=csv_data2)): - ds.add_csv_to_dataset("fake_path.csv", "timestamp", "iso_8601") + ds.add_csv_to_dataset("fake_path.csv", "timestamp", nm.timedomain.ISO_8601) ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) assert ds.rid != "" diff --git a/tests/e2e/test_toplevel.py b/tests/e2e/test_toplevel.py index a69fc510..02b196a0 100644 --- a/tests/e2e/test_toplevel.py +++ b/tests/e2e/test_toplevel.py @@ -7,7 +7,7 @@ import polars as pl import nominal as nm -from nominal import _timeutils +from nominal import _timeutils, _utils from . import _create_random_start_end @@ -45,9 +45,10 @@ def test_upload_csv_gz(csv_gz_data): def test_upload_csv_relative_timestamp(csv_data): name = f"dataset-{uuid4()}" desc = f"top-level test to create a dataset with relative timestamps {uuid4()}" + start, _ = _create_random_start_end() with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): - ds = nm.upload_csv("fake_path.csv", name, "relative_minutes", "relative_minutes", desc) + ds = nm.upload_csv("fake_path.csv", name, "relative_minutes", nm.timedomain.Relative("minutes", start), desc) ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) assert ds.rid != "" @@ -217,7 +218,7 @@ def test_download_attachment(csv_data): with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): at = nm.upload_attachment("fake_path.csv", at_title, at_desc) - with _timeutils.reader_writer() as (r, w): + with _utils.reader_writer() as (r, w): with mock.patch("builtins.open", return_value=w): nm.download_attachment(at.rid, "fake_path.csv") assert r.read() == csv_data From b70207e6aab21e0dbf3e734baa83227118d305bf Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 09:53:15 -0400 Subject: [PATCH 15/51] move iso8601 formatting to utils --- nominal/_timeutils.py | 11 +++++++++++ nominal/timedomain.py | 15 +++------------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/nominal/_timeutils.py b/nominal/_timeutils.py index bb27cd59..ff3d8473 100644 --- a/nominal/_timeutils.py +++ b/nominal/_timeutils.py @@ -3,6 +3,7 @@ from datetime import datetime, timezone import dateutil.parser +import numpy as np from ._api.combined import ingest_api, scout_run_api @@ -53,3 +54,13 @@ def _parse_timestamp(ts: str | datetime | IntegralNanosecondsUTC) -> IntegralNan if isinstance(ts, str): ts = dateutil.parser.parse(ts) return _datetime_to_integral_nanoseconds(ts) + + +def _flexible_to_iso8601(ts: datetime | IntegralNanosecondsUTC) -> str: + """datetime.datetime objects are only microsecond-precise, so we use numpy's datetime64[ns] for nanosecond precision.""" + if isinstance(ts, datetime): + return ts.astimezone(tz=timezone.utc).isoformat() + if isinstance(ts, int): + # np.datetime64[ns] assumes UTC + return str(np.datetime64(ts, "ns")) + "Z" + raise TypeError(f"timestamp {ts} must be a datetime or an integer") diff --git a/nominal/timedomain.py b/nominal/timedomain.py index ec4a6948..8051b424 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -2,13 +2,14 @@ import warnings from dataclasses import dataclass -from datetime import datetime, timezone +from datetime import datetime from types import MappingProxyType from typing import Literal, Mapping, Union -import numpy as np from typing_extensions import TypeAlias +from nominal._timeutils import _flexible_to_iso8601 + from ._api.combined import ingest_api from ._timeutils import IntegralNanosecondsUTC @@ -116,16 +117,6 @@ def _to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: raise TypeError(f"invalid time domain type: {type(domain)}") -def _flexible_to_iso8601(ts: datetime | IntegralNanosecondsUTC) -> str: - """datetime.datetime objects are only microsecond-precise, so we use numpy's datetime64[ns] for nanosecond precision.""" - if isinstance(ts, datetime): - return ts.astimezone(tz=timezone.utc).isoformat() - if isinstance(ts, int): - # np.datetime64[ns] assumes UTC - return str(np.datetime64(ts, "ns")) + "Z" - raise TypeError(f"timestamp {ts} must be a datetime or an integer") - - def _time_unit_to_conjure(unit: _LiteralTimeUnit) -> ingest_api.TimeUnit: return ingest_api.TimeUnit[unit.upper()] From b2e8afa84261c45ab34919fe109e6202ac05157a Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 09:53:54 -0400 Subject: [PATCH 16/51] more explicitly named function --- nominal/core.py | 6 +++--- nominal/timedomain.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nominal/core.py b/nominal/core.py index 7df66f7a..e91cbda4 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -35,7 +35,7 @@ ) from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed -from .timedomain import TypedTimeDomain, _to_conjure_ingest_api +from .timedomain import TypedTimeDomain, _time_domain_to_conjure_ingest_api __all__ = [ "NominalClient", @@ -281,7 +281,7 @@ def add_to_dataset_from_io( source_metadata=ingest_api.IngestSourceMetadata( timestamp_metadata=ingest_api.TimestampMetadata( series_name=timestamp_column, - timestamp_type=_to_conjure_ingest_api(timestamp_type), + timestamp_type=_time_domain_to_conjure_ingest_api(timestamp_type), ), ), ) @@ -652,7 +652,7 @@ def create_dataset_from_io( source_metadata=ingest_api.IngestSourceMetadata( timestamp_metadata=ingest_api.TimestampMetadata( series_name=timestamp_column, - timestamp_type=_to_conjure_ingest_api(timestamp_type), + timestamp_type=_time_domain_to_conjure_ingest_api(timestamp_type), ), ), ) diff --git a/nominal/timedomain.py b/nominal/timedomain.py index 8051b424..eb3ae1d7 100644 --- a/nominal/timedomain.py +++ b/nominal/timedomain.py @@ -94,7 +94,7 @@ def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: return _str_to_type[domain] -def _to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: +def _time_domain_to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: """ Note: datetimes are serialized a ISO-8601 strings, with up-to nanosecond precision. Ref: - https://github.com/palantir/conjure/blob/master/docs/concepts.md#built-in-types From 16852d2b88e84b18ae61c83d88388f9b92c7ed87 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 13:07:32 -0400 Subject: [PATCH 17/51] rename timedomain -> ts --- nominal/__init__.py | 2 +- nominal/cli/dataset.py | 2 +- nominal/core.py | 2 +- nominal/nominal.py | 30 +++++++++++++++--------------- nominal/{timedomain.py => ts.py} | 0 tests/e2e/test_sdk.py | 4 ++-- tests/e2e/test_toplevel.py | 2 +- 7 files changed, 21 insertions(+), 21 deletions(-) rename nominal/{timedomain.py => ts.py} (100%) diff --git a/nominal/__init__.py b/nominal/__init__.py index 98352fec..87a0888b 100644 --- a/nominal/__init__.py +++ b/nominal/__init__.py @@ -1,4 +1,4 @@ -from . import timedomain +from . import ts from .core import Attachment, Dataset, NominalClient, Run, Video from .nominal import ( create_run, diff --git a/nominal/cli/dataset.py b/nominal/cli/dataset.py index f3366a0a..a4e59d33 100644 --- a/nominal/cli/dataset.py +++ b/nominal/cli/dataset.py @@ -5,7 +5,7 @@ import click from ..nominal import _upload_csv -from ..timedomain import _LiteralAbsolute +from ..ts import _LiteralAbsolute from ._utils import BASE_URL_OPTION, TOKEN_OPTION, get_client diff --git a/nominal/core.py b/nominal/core.py index e91cbda4..27f11253 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -35,7 +35,7 @@ ) from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed -from .timedomain import TypedTimeDomain, _time_domain_to_conjure_ingest_api +from .ts import TypedTimeDomain, _time_domain_to_conjure_ingest_api __all__ = [ "NominalClient", diff --git a/nominal/nominal.py b/nominal/nominal.py index e2259101..031a90e8 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -8,7 +8,7 @@ from nominal import _config -from . import timedomain +from . import ts from ._timeutils import IntegralNanosecondsUTC, _parse_timestamp from ._utils import FileType, FileTypes, reader_writer from .core import Attachment, Dataset, NominalClient, Run, Video @@ -52,14 +52,14 @@ def upload_pandas( df: pd.DataFrame, name: str, timestamp_column: str, - timestamp_type: timedomain._AnyTimeDomain, + timestamp_type: ts._AnyTimeDomain, description: str | None = None, *, wait_until_complete: bool = True, ) -> Dataset: """Create a dataset in the Nominal platform from a pandas.DataFrame.""" conn = get_default_client() - time_domain = timedomain._make_typed_time_domain(timestamp_type) + time_domain = ts._make_typed_time_domain(timestamp_type) # TODO(alkasm): use parquet instead of CSV as an intermediary @@ -89,14 +89,14 @@ def upload_polars( df: pl.DataFrame, name: str, timestamp_column: str, - timestamp_type: timedomain._AnyTimeDomain, + timestamp_type: ts._AnyTimeDomain, description: str | None = None, *, wait_until_complete: bool = True, ) -> Dataset: """Create a dataset in the Nominal platform from a polars.DataFrame.""" conn = get_default_client() - time_domain = timedomain._make_typed_time_domain(timestamp_type) + time_domain = ts._make_typed_time_domain(timestamp_type) def write_and_close(df: pl.DataFrame, w: BinaryIO) -> None: df.write_csv(w) @@ -124,7 +124,7 @@ def upload_csv( file: Path | str, name: str | None, timestamp_column: str, - timestamp_type: timedomain._AnyTimeDomain, + timestamp_type: ts._AnyTimeDomain, description: str | None = None, *, wait_until_complete: bool = True, @@ -144,12 +144,12 @@ def _upload_csv( file: Path | str, name: str | None, timestamp_column: str, - timestamp_type: timedomain._AnyTimeDomain, + timestamp_type: ts._AnyTimeDomain, description: str | None = None, *, wait_until_complete: bool = True, ) -> Dataset: - time_domain = timedomain._make_typed_time_domain(timestamp_type) + time_domain = ts._make_typed_time_domain(timestamp_type) dataset = conn.create_csv_dataset( file, name, @@ -191,7 +191,7 @@ def create_run_csv( file: Path | str, name: str, timestamp_column: str, - timestamp_type: timedomain._LiteralAbsolute | timedomain.Iso8601 | timedomain.Epoch, + timestamp_type: ts._LiteralAbsolute | ts.Iso8601 | ts.Epoch, description: str | None = None, ) -> Run: """Create a dataset from a CSV file, and create a run based on it. @@ -205,8 +205,8 @@ def create_run_csv( The run start and end times are created from the minimum and maximum timestamps in the CSV file in the timestamp column. """ - typed_timestamp_type = timedomain._make_typed_time_domain(timestamp_type) - if not isinstance(typed_timestamp_type, (timedomain.Iso8601, timedomain.Epoch)): + typed_timestamp_type = ts._make_typed_time_domain(timestamp_type) + if not isinstance(typed_timestamp_type, (ts.Iso8601, ts.Epoch)): raise ValueError( "`create_run_csv()` only supports iso8601 or epoch timestamps: use `upload_dataset()` and `create_run()` instead" ) @@ -297,17 +297,17 @@ def get_video(rid: str) -> Video: def _get_start_end_timestamp_csv_file( file: Path | str, timestamp_column: str, - timestamp_type: timedomain.Iso8601 | timedomain.Epoch, + timestamp_type: ts.Iso8601 | ts.Epoch, ) -> tuple[IntegralNanosecondsUTC, IntegralNanosecondsUTC]: import pandas as pd df = pd.read_csv(file) ts_col = df[timestamp_column] - if isinstance(timestamp_type, timedomain.Iso8601): + if isinstance(timestamp_type, ts.Iso8601): ts_col = pd.to_datetime(ts_col) - elif isinstance(timestamp_type, timedomain.Epoch): - pd_units: dict[timedomain._LiteralTimeUnit, str] = { + elif isinstance(timestamp_type, ts.Epoch): + pd_units: dict[ts._LiteralTimeUnit, str] = { "hours": "s", # hours are not supported by pandas "minutes": "s", # minutes are not supported by pandas "seconds": "s", diff --git a/nominal/timedomain.py b/nominal/ts.py similarity index 100% rename from nominal/timedomain.py rename to nominal/ts.py diff --git a/tests/e2e/test_sdk.py b/tests/e2e/test_sdk.py index 5a1b2d8c..a67f5c3e 100644 --- a/tests/e2e/test_sdk.py +++ b/tests/e2e/test_sdk.py @@ -69,11 +69,11 @@ def test_add_csv_to_dataset(csv_data, csv_data2): desc = f"TESTING sdk to add more data to a dataset {uuid4()}" with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): - ds = nm.upload_csv("fake_path.csv", name, "timestamp", nm.timedomain.ISO_8601, desc) + ds = nm.upload_csv("fake_path.csv", name, "timestamp", nm.ts.ISO_8601, desc) ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) with mock.patch("builtins.open", mock.mock_open(read_data=csv_data2)): - ds.add_csv_to_dataset("fake_path.csv", "timestamp", nm.timedomain.ISO_8601) + ds.add_csv_to_dataset("fake_path.csv", "timestamp", nm.ts.ISO_8601) ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) assert ds.rid != "" diff --git a/tests/e2e/test_toplevel.py b/tests/e2e/test_toplevel.py index 02b196a0..838b1ca4 100644 --- a/tests/e2e/test_toplevel.py +++ b/tests/e2e/test_toplevel.py @@ -48,7 +48,7 @@ def test_upload_csv_relative_timestamp(csv_data): start, _ = _create_random_start_end() with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): - ds = nm.upload_csv("fake_path.csv", name, "relative_minutes", nm.timedomain.Relative("minutes", start), desc) + ds = nm.upload_csv("fake_path.csv", name, "relative_minutes", nm.ts.Relative("minutes", start), desc) ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) assert ds.rid != "" From adc07f87b56a8c5b9f61b72781dc5d740440120a Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 13:10:15 -0400 Subject: [PATCH 18/51] move integral nanoseconds to ts --- nominal/_timeutils.py | 3 +-- nominal/core.py | 3 +-- nominal/nominal.py | 3 ++- nominal/ts.py | 20 +++++++++++++++++++- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/nominal/_timeutils.py b/nominal/_timeutils.py index ff3d8473..bf562e25 100644 --- a/nominal/_timeutils.py +++ b/nominal/_timeutils.py @@ -5,10 +5,9 @@ import dateutil.parser import numpy as np +from .ts import IntegralNanosecondsUTC from ._api.combined import ingest_api, scout_run_api -IntegralNanosecondsUTC = int - def _flexible_time_to_conjure_scout_run_api(timestamp: datetime | IntegralNanosecondsUTC) -> scout_run_api.UtcTimestamp: seconds, nanos = _flexible_time_to_seconds_nanos(timestamp) diff --git a/nominal/core.py b/nominal/core.py index 27f11253..3f08e6d7 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -28,14 +28,13 @@ ) from ._multipart import put_multipart_upload from ._timeutils import ( - IntegralNanosecondsUTC, _conjure_time_to_integral_nanoseconds, _flexible_time_to_conjure_ingest_api, _flexible_time_to_conjure_scout_run_api, ) from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed -from .ts import TypedTimeDomain, _time_domain_to_conjure_ingest_api +from .ts import IntegralNanosecondsUTC, TypedTimeDomain, _time_domain_to_conjure_ingest_api __all__ = [ "NominalClient", diff --git a/nominal/nominal.py b/nominal/nominal.py index 031a90e8..cc161195 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -7,9 +7,10 @@ from typing import TYPE_CHECKING, BinaryIO from nominal import _config +from .ts import IntegralNanosecondsUTC from . import ts -from ._timeutils import IntegralNanosecondsUTC, _parse_timestamp +from ._timeutils import _parse_timestamp from ._utils import FileType, FileTypes, reader_writer from .core import Attachment, Dataset, NominalClient, Run, Video diff --git a/nominal/ts.py b/nominal/ts.py index eb3ae1d7..233e21ae 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -11,7 +11,25 @@ from nominal._timeutils import _flexible_to_iso8601 from ._api.combined import ingest_api -from ._timeutils import IntegralNanosecondsUTC + +__all__ = [ + "Iso8601", + "Epoch", + "Relative", + "Custom", + "ISO_8601", + "EPOCH_NANOSECONDS", + "EPOCH_MICROSECONDS", + "EPOCH_MILLISECONDS", + "EPOCH_SECONDS", + "EPOCH_MINUTES", + "EPOCH_HOURS", + "TypedTimeDomain", + "IntegralNanosecondsUTC", +] + + +IntegralNanosecondsUTC = int @dataclass(frozen=True) From f70135ec3d74e575988a7e1edd563bc478b691e6 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 13:10:30 -0400 Subject: [PATCH 19/51] fix --- nominal/_timeutils.py | 2 +- nominal/nominal.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nominal/_timeutils.py b/nominal/_timeutils.py index bf562e25..0f8eb60f 100644 --- a/nominal/_timeutils.py +++ b/nominal/_timeutils.py @@ -5,8 +5,8 @@ import dateutil.parser import numpy as np -from .ts import IntegralNanosecondsUTC from ._api.combined import ingest_api, scout_run_api +from .ts import IntegralNanosecondsUTC def _flexible_time_to_conjure_scout_run_api(timestamp: datetime | IntegralNanosecondsUTC) -> scout_run_api.UtcTimestamp: diff --git a/nominal/nominal.py b/nominal/nominal.py index cc161195..12a3d6bf 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -7,12 +7,12 @@ from typing import TYPE_CHECKING, BinaryIO from nominal import _config -from .ts import IntegralNanosecondsUTC from . import ts from ._timeutils import _parse_timestamp from ._utils import FileType, FileTypes, reader_writer from .core import Attachment, Dataset, NominalClient, Run, Video +from .ts import IntegralNanosecondsUTC if TYPE_CHECKING: import pandas as pd From 2499d0cde2acbffa477636e75b65d2a1e60f3bf4 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 13:17:33 -0400 Subject: [PATCH 20/51] deal w/ circular import --- nominal/_timeutils.py | 5 ++++- nominal/ts.py | 6 +----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/nominal/_timeutils.py b/nominal/_timeutils.py index 0f8eb60f..d8d1934a 100644 --- a/nominal/_timeutils.py +++ b/nominal/_timeutils.py @@ -1,12 +1,15 @@ from __future__ import annotations from datetime import datetime, timezone +from typing import TypeAlias import dateutil.parser import numpy as np from ._api.combined import ingest_api, scout_run_api -from .ts import IntegralNanosecondsUTC + +# defined here rather than ts.py to avoid circular imports +IntegralNanosecondsUTC: TypeAlias = int def _flexible_time_to_conjure_scout_run_api(timestamp: datetime | IntegralNanosecondsUTC) -> scout_run_api.UtcTimestamp: diff --git a/nominal/ts.py b/nominal/ts.py index 233e21ae..88f1c5ff 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -8,9 +8,8 @@ from typing_extensions import TypeAlias -from nominal._timeutils import _flexible_to_iso8601 - from ._api.combined import ingest_api +from ._timeutils import IntegralNanosecondsUTC, _flexible_to_iso8601 __all__ = [ "Iso8601", @@ -29,9 +28,6 @@ ] -IntegralNanosecondsUTC = int - - @dataclass(frozen=True) class Iso8601: pass From 1369fe1b9d91b469f290d1367fee2aa4e254444d Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 13:28:46 -0400 Subject: [PATCH 21/51] created an abc for time domains --- nominal/core.py | 6 ++--- nominal/ts.py | 62 +++++++++++++++++++++++++++---------------------- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/nominal/core.py b/nominal/core.py index 3f08e6d7..de1351b0 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -34,7 +34,7 @@ ) from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed -from .ts import IntegralNanosecondsUTC, TypedTimeDomain, _time_domain_to_conjure_ingest_api +from .ts import IntegralNanosecondsUTC, TypedTimeDomain __all__ = [ "NominalClient", @@ -280,7 +280,7 @@ def add_to_dataset_from_io( source_metadata=ingest_api.IngestSourceMetadata( timestamp_metadata=ingest_api.TimestampMetadata( series_name=timestamp_column, - timestamp_type=_time_domain_to_conjure_ingest_api(timestamp_type), + timestamp_type=timestamp_type._to_conjure_ingest_api(), ), ), ) @@ -651,7 +651,7 @@ def create_dataset_from_io( source_metadata=ingest_api.IngestSourceMetadata( timestamp_metadata=ingest_api.TimestampMetadata( series_name=timestamp_column, - timestamp_type=_time_domain_to_conjure_ingest_api(timestamp_type), + timestamp_type=timestamp_type._to_conjure_ingest_api(), ), ), ) diff --git a/nominal/ts.py b/nominal/ts.py index 88f1c5ff..f3374cdb 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -1,5 +1,6 @@ from __future__ import annotations +import abc import warnings from dataclasses import dataclass from datetime import datetime @@ -28,28 +29,56 @@ ] +class _ConjureTimestampDomain(abc.ABC): + @abc.abstractmethod + def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: + pass + + @dataclass(frozen=True) -class Iso8601: - pass +class Iso8601(_ConjureTimestampDomain): + def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: + return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(iso8601=ingest_api.Iso8601Timestamp())) @dataclass(frozen=True) -class Epoch: +class Epoch(_ConjureTimestampDomain): unit: _LiteralTimeUnit + def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: + epoch = ingest_api.EpochTimestamp(time_unit=_time_unit_to_conjure(self.unit)) + return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(epoch_of_time_unit=epoch)) + @dataclass(frozen=True) -class Relative: +class Relative(_ConjureTimestampDomain): unit: _LiteralTimeUnit start: datetime | IntegralNanosecondsUTC """The starting time to which all relatives times are relative to.""" + def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: + """ + Note: The offset is a conjure datetime. They are serialized as ISO-8601 strings, with up-to nanosecond precision. + The Python type for the field is just a str. + Ref: + - https://github.com/palantir/conjure/blob/master/docs/concepts.md#built-in-types + - https://github.com/palantir/conjure/pull/1643 + """ + relative = ingest_api.RelativeTimestamp( + time_unit=_time_unit_to_conjure(self.unit), offset=_flexible_to_iso8601(self.start) + ) + return ingest_api.TimestampType(relative=relative) + @dataclass(frozen=True) -class Custom: +class Custom(_ConjureTimestampDomain): format: str default_year: int | None = None + def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: + fmt = ingest_api.CustomTimestamp(format=self.format, default_year=self.default_year) + return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(custom_format=fmt)) + ISO_8601 = Iso8601() EPOCH_NANOSECONDS = Epoch("nanoseconds") @@ -108,29 +137,6 @@ def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: return _str_to_type[domain] -def _time_domain_to_conjure_ingest_api(domain: TypedTimeDomain) -> ingest_api.TimestampType: - """ - Note: datetimes are serialized a ISO-8601 strings, with up-to nanosecond precision. Ref: - - https://github.com/palantir/conjure/blob/master/docs/concepts.md#built-in-types - - https://github.com/palantir/conjure/pull/1643 - """ - - if isinstance(domain, Iso8601): - return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(iso8601=ingest_api.Iso8601Timestamp())) - if isinstance(domain, Epoch): - epoch = ingest_api.EpochTimestamp(time_unit=_time_unit_to_conjure(domain.unit)) - return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(epoch_of_time_unit=epoch)) - if isinstance(domain, Custom): - fmt = ingest_api.CustomTimestamp(format=domain.format, default_year=domain.default_year) - return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(custom_format=fmt)) - if isinstance(domain, Relative): - relative = ingest_api.RelativeTimestamp( - time_unit=_time_unit_to_conjure(domain.unit), offset=_flexible_to_iso8601(domain.start) - ) - return ingest_api.TimestampType(relative=relative) - raise TypeError(f"invalid time domain type: {type(domain)}") - - def _time_unit_to_conjure(unit: _LiteralTimeUnit) -> ingest_api.TimeUnit: return ingest_api.TimeUnit[unit.upper()] From 1eaf9ddc39d34dfc4e5891a93b3a26832baa7fd2 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 13:30:26 -0400 Subject: [PATCH 22/51] add ts to toplevel and reorder all --- nominal/__init__.py | 23 ++++++++++++----------- nominal/core.py | 1 - 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/nominal/__init__.py b/nominal/__init__.py index 87a0888b..4375e82d 100644 --- a/nominal/__init__.py +++ b/nominal/__init__.py @@ -19,25 +19,26 @@ ) __all__ = [ - "set_base_url", - "get_default_client", - "upload_pandas", - "upload_polars", - "upload_csv", - "get_dataset", + "ts", "create_run", "create_run_csv", + "download_attachment", + "get_attachment", + "get_dataset", + "get_default_client", "get_run", + "get_video", "search_runs", + "set_base_url", "upload_attachment", - "get_attachment", - "download_attachment", + "upload_csv", + "upload_pandas", + "upload_polars", "upload_video", - "get_video", # classes: when adding a new class, also add a filter to "hide" it in docs/reference/toplevel.md - "Dataset", - "Run", "Attachment", + "Dataset", "NominalClient", + "Run", "Video", ] diff --git a/nominal/core.py b/nominal/core.py index de1351b0..27ab22c3 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -42,7 +42,6 @@ "Dataset", "Attachment", "Video", - "IntegralNanosecondsUTC", ] From 14ac3e0357cd46d019471547f6ab0e3b93978e39 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 13:34:05 -0400 Subject: [PATCH 23/51] rename for similarity --- nominal/_timeutils.py | 2 +- nominal/ts.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nominal/_timeutils.py b/nominal/_timeutils.py index d8d1934a..a9239293 100644 --- a/nominal/_timeutils.py +++ b/nominal/_timeutils.py @@ -58,7 +58,7 @@ def _parse_timestamp(ts: str | datetime | IntegralNanosecondsUTC) -> IntegralNan return _datetime_to_integral_nanoseconds(ts) -def _flexible_to_iso8601(ts: datetime | IntegralNanosecondsUTC) -> str: +def _flexible_time_to_iso8601(ts: datetime | IntegralNanosecondsUTC) -> str: """datetime.datetime objects are only microsecond-precise, so we use numpy's datetime64[ns] for nanosecond precision.""" if isinstance(ts, datetime): return ts.astimezone(tz=timezone.utc).isoformat() diff --git a/nominal/ts.py b/nominal/ts.py index f3374cdb..8d35790d 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -10,7 +10,7 @@ from typing_extensions import TypeAlias from ._api.combined import ingest_api -from ._timeutils import IntegralNanosecondsUTC, _flexible_to_iso8601 +from ._timeutils import IntegralNanosecondsUTC, _flexible_time_to_iso8601 __all__ = [ "Iso8601", @@ -65,7 +65,7 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: - https://github.com/palantir/conjure/pull/1643 """ relative = ingest_api.RelativeTimestamp( - time_unit=_time_unit_to_conjure(self.unit), offset=_flexible_to_iso8601(self.start) + time_unit=_time_unit_to_conjure(self.unit), offset=_flexible_time_to_iso8601(self.start) ) return ingest_api.TimestampType(relative=relative) From 3c38e28e0b5d28888da0e05d4e28161bf14a5acf Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 13:52:38 -0400 Subject: [PATCH 24/51] all timestamp conversions through an intermediary type --- nominal/_timeutils.py | 81 ++++++++++++++++---------------------- nominal/cli/run.py | 6 +-- nominal/core.py | 20 ++++------ nominal/nominal.py | 14 ++++--- nominal/ts.py | 4 +- tests/e2e/test_toplevel.py | 4 +- 6 files changed, 57 insertions(+), 72 deletions(-) diff --git a/nominal/_timeutils.py b/nominal/_timeutils.py index a9239293..abba410f 100644 --- a/nominal/_timeutils.py +++ b/nominal/_timeutils.py @@ -1,10 +1,11 @@ from __future__ import annotations from datetime import datetime, timezone -from typing import TypeAlias +from typing import NamedTuple, TypeAlias import dateutil.parser import numpy as np +from typing_extensions import Self from ._api.combined import ingest_api, scout_run_api @@ -12,57 +13,43 @@ IntegralNanosecondsUTC: TypeAlias = int -def _flexible_time_to_conjure_scout_run_api(timestamp: datetime | IntegralNanosecondsUTC) -> scout_run_api.UtcTimestamp: - seconds, nanos = _flexible_time_to_seconds_nanos(timestamp) - return scout_run_api.UtcTimestamp(seconds_since_epoch=seconds, offset_nanoseconds=nanos) +class SecondsNanos(NamedTuple): + seconds: int + nanos: int + def to_scout_run_api(self) -> scout_run_api.UtcTimestamp: + return scout_run_api.UtcTimestamp(seconds_since_epoch=self.seconds, offset_nanoseconds=self.nanos) -def _flexible_time_to_conjure_ingest_api( - timestamp: datetime | IntegralNanosecondsUTC, -) -> ingest_api.UtcTimestamp: - seconds, nanos = _flexible_time_to_seconds_nanos(timestamp) - return ingest_api.UtcTimestamp(seconds_since_epoch=seconds, offset_nanoseconds=nanos) + def to_ingest_api(self) -> ingest_api.UtcTimestamp: + return ingest_api.UtcTimestamp(seconds_since_epoch=self.seconds, offset_nanoseconds=self.nanos) + def to_iso8601(self) -> str: + """datetime.datetime objects are only microsecond-precise, so we use numpy's datetime64[ns] for nanosecond precision.""" + return str(np.datetime64(self.to_integral_nanoseconds(), "ns")) + "Z" -def _flexible_time_to_seconds_nanos( - timestamp: datetime | IntegralNanosecondsUTC, -) -> tuple[int, int]: - if isinstance(timestamp, datetime): - return _datetime_to_seconds_nanos(timestamp) - elif isinstance(timestamp, IntegralNanosecondsUTC): - return divmod(timestamp, 1_000_000_000) - raise TypeError(f"expected {datetime} or {IntegralNanosecondsUTC}, got {type(timestamp)}") + def to_integral_nanoseconds(self) -> IntegralNanosecondsUTC: + return self.seconds * 1_000_000_000 + self.nanos + @classmethod + def from_scout_run_api(cls, ts: scout_run_api.UtcTimestamp) -> Self: + return cls(seconds=ts.seconds_since_epoch, nanos=ts.offset_nanoseconds or 0) -def _conjure_time_to_integral_nanoseconds(ts: scout_run_api.UtcTimestamp) -> IntegralNanosecondsUTC: - return ts.seconds_since_epoch * 1_000_000_000 + (ts.offset_nanoseconds or 0) + @classmethod + def from_datetime(cls, dt: datetime) -> Self: + dt = dt.astimezone(timezone.utc) + seconds = int(dt.timestamp()) + nanos = dt.microsecond * 1000 + return cls(seconds, nanos) + @classmethod + def from_integral_nanoseconds(cls, ts: IntegralNanosecondsUTC) -> Self: + seconds, nanos = divmod(ts, 1_000_000_000) + return cls(seconds, nanos) -def _datetime_to_seconds_nanos(dt: datetime) -> tuple[int, int]: - dt = dt.astimezone(timezone.utc) - seconds = int(dt.timestamp()) - nanos = dt.microsecond * 1000 - return seconds, nanos - - -def _datetime_to_integral_nanoseconds(dt: datetime) -> IntegralNanosecondsUTC: - seconds, nanos = _datetime_to_seconds_nanos(dt) - return seconds * 1_000_000_000 + nanos - - -def _parse_timestamp(ts: str | datetime | IntegralNanosecondsUTC) -> IntegralNanosecondsUTC: - if isinstance(ts, int): - return ts - if isinstance(ts, str): - ts = dateutil.parser.parse(ts) - return _datetime_to_integral_nanoseconds(ts) - - -def _flexible_time_to_iso8601(ts: datetime | IntegralNanosecondsUTC) -> str: - """datetime.datetime objects are only microsecond-precise, so we use numpy's datetime64[ns] for nanosecond precision.""" - if isinstance(ts, datetime): - return ts.astimezone(tz=timezone.utc).isoformat() - if isinstance(ts, int): - # np.datetime64[ns] assumes UTC - return str(np.datetime64(ts, "ns")) + "Z" - raise TypeError(f"timestamp {ts} must be a datetime or an integer") + @classmethod + def from_flexible(cls, ts: str | datetime | IntegralNanosecondsUTC) -> Self: + if isinstance(ts, int): + return cls.from_integral_nanoseconds(ts) + if isinstance(ts, str): + ts = dateutil.parser.parse(ts) + return cls.from_datetime(ts) diff --git a/nominal/cli/run.py b/nominal/cli/run.py index a1c85075..4185a220 100644 --- a/nominal/cli/run.py +++ b/nominal/cli/run.py @@ -4,7 +4,7 @@ import click -from .._timeutils import _parse_timestamp +from .._timeutils import SecondsNanos from ._utils import BASE_URL_OPTION, TOKEN_OPTION, get_client @@ -36,8 +36,8 @@ def create( client = get_client(base_url, token) run = client.create_run( name, - _parse_timestamp(start), - _parse_timestamp(end), + SecondsNanos.from_flexible(start).to_integral_nanoseconds(), + SecondsNanos.from_flexible(end).to_integral_nanoseconds(), desc, properties=dict(properties), labels=labels, diff --git a/nominal/core.py b/nominal/core.py index 27ab22c3..b648da64 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -27,11 +27,7 @@ upload_api, ) from ._multipart import put_multipart_upload -from ._timeutils import ( - _conjure_time_to_integral_nanoseconds, - _flexible_time_to_conjure_ingest_api, - _flexible_time_to_conjure_scout_run_api, -) +from ._timeutils import SecondsNanos from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed from .ts import IntegralNanosecondsUTC, TypedTimeDomain @@ -164,8 +160,8 @@ def _from_conjure(cls, nominal_client: NominalClient, run: scout_run_api.Run) -> description=run.description, properties=MappingProxyType(run.properties), labels=tuple(run.labels), - start=_conjure_time_to_integral_nanoseconds(run.start_time), - end=(_conjure_time_to_integral_nanoseconds(run.end_time) if run.end_time else None), + start=SecondsNanos.from_scout_run_api(run.start_time).to_integral_nanoseconds(), + end=(SecondsNanos.from_scout_run_api(run.end_time).to_integral_nanoseconds() if run.end_time else None), _client=nominal_client, ) @@ -513,9 +509,9 @@ def create_run( labels=list(labels), links=[], properties={} if properties is None else dict(properties), - start_time=_flexible_time_to_conjure_scout_run_api(start), + start_time=SecondsNanos.from_flexible(start).to_scout_run_api(), title=name, - end_time=_flexible_time_to_conjure_scout_run_api(end), + end_time=SecondsNanos.from_flexible(end).to_scout_run_api(), ) response = self._run_client.create_run(self._auth_header, request) return Run._from_conjure(self, response) @@ -686,7 +682,7 @@ def create_video_from_io( sources=[ingest_api.IngestSource(s3=ingest_api.S3IngestSource(path=s3_path))], timestamps=ingest_api.VideoTimestampManifest( no_manifest=ingest_api.NoTimestampManifest( - starting_timestamp=_flexible_time_to_conjure_ingest_api(start) + starting_timestamp=SecondsNanos.from_flexible(start).to_ingest_api() ) ), description=description, @@ -825,10 +821,10 @@ def _create_search_runs_query( ) -> scout_run_api.SearchQuery: queries = [] if start is not None: - q = scout_run_api.SearchQuery(start_time_inclusive=_flexible_time_to_conjure_scout_run_api(start)) + q = scout_run_api.SearchQuery(start_time_inclusive=SecondsNanos.from_flexible(start).to_scout_run_api()) queries.append(q) if end is not None: - q = scout_run_api.SearchQuery(end_time_inclusive=_flexible_time_to_conjure_scout_run_api(end)) + q = scout_run_api.SearchQuery(end_time_inclusive=SecondsNanos.from_flexible(end).to_scout_run_api()) queries.append(q) if exact_name is not None: q = scout_run_api.SearchQuery(exact_match=exact_name) diff --git a/nominal/nominal.py b/nominal/nominal.py index 12a3d6bf..ccccdcb1 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -9,7 +9,7 @@ from nominal import _config from . import ts -from ._timeutils import _parse_timestamp +from ._timeutils import SecondsNanos from ._utils import FileType, FileTypes, reader_writer from .core import Attachment, Dataset, NominalClient, Run, Video from .ts import IntegralNanosecondsUTC @@ -182,8 +182,8 @@ def create_run( conn = get_default_client() return conn.create_run( name, - start=_parse_timestamp(start), - end=_parse_timestamp(end), + start=SecondsNanos.from_flexible(start).to_integral_nanoseconds(), + end=SecondsNanos.from_flexible(end).to_integral_nanoseconds(), description=description, ) @@ -243,8 +243,8 @@ def search_runs( raise ValueError("must provide one of: start, end, exact_name, label, or property") conn = get_default_client() runs = conn.search_runs( - start=None if start is None else _parse_timestamp(start), - end=None if end is None else _parse_timestamp(end), + start=None if start is None else SecondsNanos.from_flexible(start).to_integral_nanoseconds(), + end=None if end is None else SecondsNanos.from_flexible(end).to_integral_nanoseconds(), exact_name=exact_name, label=label, property=property, @@ -286,7 +286,9 @@ def upload_video( path = Path(file) file_type = FileType.from_path(path) with open(file, "rb") as f: - return conn.create_video_from_io(f, name, _parse_timestamp(start), description, file_type) + return conn.create_video_from_io( + f, name, SecondsNanos.from_flexible(start).to_integral_nanoseconds(), description, file_type + ) def get_video(rid: str) -> Video: diff --git a/nominal/ts.py b/nominal/ts.py index 8d35790d..f31b3340 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -10,7 +10,7 @@ from typing_extensions import TypeAlias from ._api.combined import ingest_api -from ._timeutils import IntegralNanosecondsUTC, _flexible_time_to_iso8601 +from ._timeutils import IntegralNanosecondsUTC, SecondsNanos __all__ = [ "Iso8601", @@ -65,7 +65,7 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: - https://github.com/palantir/conjure/pull/1643 """ relative = ingest_api.RelativeTimestamp( - time_unit=_time_unit_to_conjure(self.unit), offset=_flexible_time_to_iso8601(self.start) + time_unit=_time_unit_to_conjure(self.unit), offset=SecondsNanos.from_flexible(self.start).to_iso8601() ) return ingest_api.TimestampType(relative=relative) diff --git a/tests/e2e/test_toplevel.py b/tests/e2e/test_toplevel.py index 838b1ca4..ead2f9f5 100644 --- a/tests/e2e/test_toplevel.py +++ b/tests/e2e/test_toplevel.py @@ -176,8 +176,8 @@ def test_search_runs(): assert run2.rid == run.rid != "" assert run2.name == run.name == name assert run2.description == run.description == desc - assert run2.start == run.start == _timeutils._parse_timestamp(start) - assert run2.end == run.end == _timeutils._parse_timestamp(end) + assert run2.start == run.start == _timeutils.SecondsNanos(start).to_integral_nanoseconds() + assert run2.end == run.end == _timeutils.SecondsNanos(end).to_integral_nanoseconds() assert run2.properties == run.properties == {} assert run2.labels == run.labels == () From 6ec45867d66e3e2992247e3af12346c929a49588 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 13:56:24 -0400 Subject: [PATCH 25/51] move SecondsNanos to new ts file, make internal --- nominal/_timeutils.py | 51 +---------------------------------- nominal/cli/run.py | 6 ++--- nominal/core.py | 17 ++++++------ nominal/nominal.py | 13 +++++---- nominal/ts.py | 54 ++++++++++++++++++++++++++++++++++---- tests/e2e/test_toplevel.py | 5 ++-- 6 files changed, 70 insertions(+), 76 deletions(-) diff --git a/nominal/_timeutils.py b/nominal/_timeutils.py index abba410f..960c73cf 100644 --- a/nominal/_timeutils.py +++ b/nominal/_timeutils.py @@ -1,55 +1,6 @@ from __future__ import annotations -from datetime import datetime, timezone -from typing import NamedTuple, TypeAlias - -import dateutil.parser -import numpy as np -from typing_extensions import Self - -from ._api.combined import ingest_api, scout_run_api +from typing import TypeAlias # defined here rather than ts.py to avoid circular imports IntegralNanosecondsUTC: TypeAlias = int - - -class SecondsNanos(NamedTuple): - seconds: int - nanos: int - - def to_scout_run_api(self) -> scout_run_api.UtcTimestamp: - return scout_run_api.UtcTimestamp(seconds_since_epoch=self.seconds, offset_nanoseconds=self.nanos) - - def to_ingest_api(self) -> ingest_api.UtcTimestamp: - return ingest_api.UtcTimestamp(seconds_since_epoch=self.seconds, offset_nanoseconds=self.nanos) - - def to_iso8601(self) -> str: - """datetime.datetime objects are only microsecond-precise, so we use numpy's datetime64[ns] for nanosecond precision.""" - return str(np.datetime64(self.to_integral_nanoseconds(), "ns")) + "Z" - - def to_integral_nanoseconds(self) -> IntegralNanosecondsUTC: - return self.seconds * 1_000_000_000 + self.nanos - - @classmethod - def from_scout_run_api(cls, ts: scout_run_api.UtcTimestamp) -> Self: - return cls(seconds=ts.seconds_since_epoch, nanos=ts.offset_nanoseconds or 0) - - @classmethod - def from_datetime(cls, dt: datetime) -> Self: - dt = dt.astimezone(timezone.utc) - seconds = int(dt.timestamp()) - nanos = dt.microsecond * 1000 - return cls(seconds, nanos) - - @classmethod - def from_integral_nanoseconds(cls, ts: IntegralNanosecondsUTC) -> Self: - seconds, nanos = divmod(ts, 1_000_000_000) - return cls(seconds, nanos) - - @classmethod - def from_flexible(cls, ts: str | datetime | IntegralNanosecondsUTC) -> Self: - if isinstance(ts, int): - return cls.from_integral_nanoseconds(ts) - if isinstance(ts, str): - ts = dateutil.parser.parse(ts) - return cls.from_datetime(ts) diff --git a/nominal/cli/run.py b/nominal/cli/run.py index 4185a220..f5e65c85 100644 --- a/nominal/cli/run.py +++ b/nominal/cli/run.py @@ -4,7 +4,7 @@ import click -from .._timeutils import SecondsNanos +from ..ts import _SecondsNanos from ._utils import BASE_URL_OPTION, TOKEN_OPTION, get_client @@ -36,8 +36,8 @@ def create( client = get_client(base_url, token) run = client.create_run( name, - SecondsNanos.from_flexible(start).to_integral_nanoseconds(), - SecondsNanos.from_flexible(end).to_integral_nanoseconds(), + _SecondsNanos.from_flexible(start).to_integral_nanoseconds(), + _SecondsNanos.from_flexible(end).to_integral_nanoseconds(), desc, properties=dict(properties), labels=labels, diff --git a/nominal/core.py b/nominal/core.py index b648da64..6e7d7727 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -27,10 +27,9 @@ upload_api, ) from ._multipart import put_multipart_upload -from ._timeutils import SecondsNanos from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed -from .ts import IntegralNanosecondsUTC, TypedTimeDomain +from .ts import IntegralNanosecondsUTC, _SecondsNanos, TypedTimeDomain __all__ = [ "NominalClient", @@ -160,8 +159,8 @@ def _from_conjure(cls, nominal_client: NominalClient, run: scout_run_api.Run) -> description=run.description, properties=MappingProxyType(run.properties), labels=tuple(run.labels), - start=SecondsNanos.from_scout_run_api(run.start_time).to_integral_nanoseconds(), - end=(SecondsNanos.from_scout_run_api(run.end_time).to_integral_nanoseconds() if run.end_time else None), + start=_SecondsNanos.from_scout_run_api(run.start_time).to_integral_nanoseconds(), + end=(_SecondsNanos.from_scout_run_api(run.end_time).to_integral_nanoseconds() if run.end_time else None), _client=nominal_client, ) @@ -509,9 +508,9 @@ def create_run( labels=list(labels), links=[], properties={} if properties is None else dict(properties), - start_time=SecondsNanos.from_flexible(start).to_scout_run_api(), + start_time=_SecondsNanos.from_flexible(start).to_scout_run_api(), title=name, - end_time=SecondsNanos.from_flexible(end).to_scout_run_api(), + end_time=_SecondsNanos.from_flexible(end).to_scout_run_api(), ) response = self._run_client.create_run(self._auth_header, request) return Run._from_conjure(self, response) @@ -682,7 +681,7 @@ def create_video_from_io( sources=[ingest_api.IngestSource(s3=ingest_api.S3IngestSource(path=s3_path))], timestamps=ingest_api.VideoTimestampManifest( no_manifest=ingest_api.NoTimestampManifest( - starting_timestamp=SecondsNanos.from_flexible(start).to_ingest_api() + starting_timestamp=_SecondsNanos.from_flexible(start).to_ingest_api() ) ), description=description, @@ -821,10 +820,10 @@ def _create_search_runs_query( ) -> scout_run_api.SearchQuery: queries = [] if start is not None: - q = scout_run_api.SearchQuery(start_time_inclusive=SecondsNanos.from_flexible(start).to_scout_run_api()) + q = scout_run_api.SearchQuery(start_time_inclusive=_SecondsNanos.from_flexible(start).to_scout_run_api()) queries.append(q) if end is not None: - q = scout_run_api.SearchQuery(end_time_inclusive=SecondsNanos.from_flexible(end).to_scout_run_api()) + q = scout_run_api.SearchQuery(end_time_inclusive=_SecondsNanos.from_flexible(end).to_scout_run_api()) queries.append(q) if exact_name is not None: q = scout_run_api.SearchQuery(exact_match=exact_name) diff --git a/nominal/nominal.py b/nominal/nominal.py index ccccdcb1..d37afff7 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -9,10 +9,9 @@ from nominal import _config from . import ts -from ._timeutils import SecondsNanos from ._utils import FileType, FileTypes, reader_writer from .core import Attachment, Dataset, NominalClient, Run, Video -from .ts import IntegralNanosecondsUTC +from .ts import IntegralNanosecondsUTC, _SecondsNanos if TYPE_CHECKING: import pandas as pd @@ -182,8 +181,8 @@ def create_run( conn = get_default_client() return conn.create_run( name, - start=SecondsNanos.from_flexible(start).to_integral_nanoseconds(), - end=SecondsNanos.from_flexible(end).to_integral_nanoseconds(), + start=_SecondsNanos.from_flexible(start).to_integral_nanoseconds(), + end=_SecondsNanos.from_flexible(end).to_integral_nanoseconds(), description=description, ) @@ -243,8 +242,8 @@ def search_runs( raise ValueError("must provide one of: start, end, exact_name, label, or property") conn = get_default_client() runs = conn.search_runs( - start=None if start is None else SecondsNanos.from_flexible(start).to_integral_nanoseconds(), - end=None if end is None else SecondsNanos.from_flexible(end).to_integral_nanoseconds(), + start=None if start is None else _SecondsNanos.from_flexible(start).to_integral_nanoseconds(), + end=None if end is None else _SecondsNanos.from_flexible(end).to_integral_nanoseconds(), exact_name=exact_name, label=label, property=property, @@ -287,7 +286,7 @@ def upload_video( file_type = FileType.from_path(path) with open(file, "rb") as f: return conn.create_video_from_io( - f, name, SecondsNanos.from_flexible(start).to_integral_nanoseconds(), description, file_type + f, name, _SecondsNanos.from_flexible(start).to_integral_nanoseconds(), description, file_type ) diff --git a/nominal/ts.py b/nominal/ts.py index f31b3340..2d0d0830 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -3,14 +3,16 @@ import abc import warnings from dataclasses import dataclass -from datetime import datetime +from datetime import datetime, timezone from types import MappingProxyType -from typing import Literal, Mapping, Union +from typing import Literal, Mapping, NamedTuple, Self, Union +import dateutil.parser +import numpy as np from typing_extensions import TypeAlias -from ._api.combined import ingest_api -from ._timeutils import IntegralNanosecondsUTC, SecondsNanos +from ._api.combined import ingest_api, scout_run_api +from ._timeutils import IntegralNanosecondsUTC __all__ = [ "Iso8601", @@ -65,7 +67,7 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: - https://github.com/palantir/conjure/pull/1643 """ relative = ingest_api.RelativeTimestamp( - time_unit=_time_unit_to_conjure(self.unit), offset=SecondsNanos.from_flexible(self.start).to_iso8601() + time_unit=_time_unit_to_conjure(self.unit), offset=_SecondsNanos.from_flexible(self.start).to_iso8601() ) return ingest_api.TimestampType(relative=relative) @@ -158,3 +160,45 @@ def _time_unit_to_conjure(unit: _LiteralTimeUnit) -> ingest_api.TimeUnit: "relative_hours": Relative("hours", start=0), } ) + + +class _SecondsNanos(NamedTuple): + seconds: int + nanos: int + + def to_scout_run_api(self) -> scout_run_api.UtcTimestamp: + return scout_run_api.UtcTimestamp(seconds_since_epoch=self.seconds, offset_nanoseconds=self.nanos) + + def to_ingest_api(self) -> ingest_api.UtcTimestamp: + return ingest_api.UtcTimestamp(seconds_since_epoch=self.seconds, offset_nanoseconds=self.nanos) + + def to_iso8601(self) -> str: + """datetime.datetime objects are only microsecond-precise, so we use numpy's datetime64[ns] for nanosecond precision.""" + return str(np.datetime64(self.to_integral_nanoseconds(), "ns")) + "Z" + + def to_integral_nanoseconds(self) -> IntegralNanosecondsUTC: + return self.seconds * 1_000_000_000 + self.nanos + + @classmethod + def from_scout_run_api(cls, ts: scout_run_api.UtcTimestamp) -> Self: + return cls(seconds=ts.seconds_since_epoch, nanos=ts.offset_nanoseconds or 0) + + @classmethod + def from_datetime(cls, dt: datetime) -> Self: + dt = dt.astimezone(timezone.utc) + seconds = int(dt.timestamp()) + nanos = dt.microsecond * 1000 + return cls(seconds, nanos) + + @classmethod + def from_integral_nanoseconds(cls, ts: IntegralNanosecondsUTC) -> Self: + seconds, nanos = divmod(ts, 1_000_000_000) + return cls(seconds, nanos) + + @classmethod + def from_flexible(cls, ts: str | datetime | IntegralNanosecondsUTC) -> Self: + if isinstance(ts, int): + return cls.from_integral_nanoseconds(ts) + if isinstance(ts, str): + ts = dateutil.parser.parse(ts) + return cls.from_datetime(ts) diff --git a/tests/e2e/test_toplevel.py b/tests/e2e/test_toplevel.py index ead2f9f5..6129cc0d 100644 --- a/tests/e2e/test_toplevel.py +++ b/tests/e2e/test_toplevel.py @@ -7,6 +7,7 @@ import polars as pl import nominal as nm +import nominal.ts from nominal import _timeutils, _utils from . import _create_random_start_end @@ -176,8 +177,8 @@ def test_search_runs(): assert run2.rid == run.rid != "" assert run2.name == run.name == name assert run2.description == run.description == desc - assert run2.start == run.start == _timeutils.SecondsNanos(start).to_integral_nanoseconds() - assert run2.end == run.end == _timeutils.SecondsNanos(end).to_integral_nanoseconds() + assert run2.start == run.start == nominal.ts._SecondsNanos(start).to_integral_nanoseconds() + assert run2.end == run.end == nominal.ts._SecondsNanos(end).to_integral_nanoseconds() assert run2.properties == run.properties == {} assert run2.labels == run.labels == () From 8507888f267460f7ecf17dd57bd47613b205602c Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 13:58:09 -0400 Subject: [PATCH 26/51] remove timeutils --- nominal/_timeutils.py | 6 ------ nominal/core.py | 2 +- nominal/ts.py | 3 ++- 3 files changed, 3 insertions(+), 8 deletions(-) delete mode 100644 nominal/_timeutils.py diff --git a/nominal/_timeutils.py b/nominal/_timeutils.py deleted file mode 100644 index 960c73cf..00000000 --- a/nominal/_timeutils.py +++ /dev/null @@ -1,6 +0,0 @@ -from __future__ import annotations - -from typing import TypeAlias - -# defined here rather than ts.py to avoid circular imports -IntegralNanosecondsUTC: TypeAlias = int diff --git a/nominal/core.py b/nominal/core.py index 6e7d7727..c14c2cf3 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -29,7 +29,7 @@ from ._multipart import put_multipart_upload from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed -from .ts import IntegralNanosecondsUTC, _SecondsNanos, TypedTimeDomain +from .ts import IntegralNanosecondsUTC, TypedTimeDomain, _SecondsNanos __all__ = [ "NominalClient", diff --git a/nominal/ts.py b/nominal/ts.py index 2d0d0830..72bd3a40 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -12,7 +12,6 @@ from typing_extensions import TypeAlias from ._api.combined import ingest_api, scout_run_api -from ._timeutils import IntegralNanosecondsUTC __all__ = [ "Iso8601", @@ -30,6 +29,8 @@ "IntegralNanosecondsUTC", ] +IntegralNanosecondsUTC: TypeAlias = int + class _ConjureTimestampDomain(abc.ABC): @abc.abstractmethod From c1a92a63cafc6e0667b97d0b86f1e2a13604c254 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 14:01:52 -0400 Subject: [PATCH 27/51] update e2e tests --- tests/e2e/test_toplevel.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/e2e/test_toplevel.py b/tests/e2e/test_toplevel.py index 6129cc0d..507a54eb 100644 --- a/tests/e2e/test_toplevel.py +++ b/tests/e2e/test_toplevel.py @@ -7,8 +7,7 @@ import polars as pl import nominal as nm -import nominal.ts -from nominal import _timeutils, _utils +from nominal import _utils from . import _create_random_start_end @@ -114,8 +113,8 @@ def test_create_run(): assert run.rid != "" assert run.name == name assert run.description == desc - assert run.start == _timeutils._datetime_to_integral_nanoseconds(start) - assert run.end == _timeutils._datetime_to_integral_nanoseconds(end) + assert run.start == nm.ts._SecondsNanos.from_datetime(start).to_integral_nanoseconds() + assert run.end == nm.ts._SecondsNanos.from_datetime(end).to_integral_nanoseconds() assert len(run.properties) == 0 assert len(run.labels) == 0 @@ -132,8 +131,8 @@ def test_create_run_csv(csv_data): assert run.rid != "" assert run.name == name assert run.description == desc - assert run.start == _timeutils._datetime_to_integral_nanoseconds(start) - assert run.end == _timeutils._datetime_to_integral_nanoseconds(end) + assert run.start == nm.ts._SecondsNanos.from_datetime(start).to_integral_nanoseconds() + assert run.end == nm.ts._SecondsNanos.from_datetime(end).to_integral_nanoseconds() assert len(run.properties) == 0 assert len(run.labels) == 0 @@ -158,8 +157,8 @@ def test_get_run(): assert run2.rid == run.rid != "" assert run2.name == run.name == name assert run2.description == run.description == desc - assert run2.start == run.start == _timeutils._parse_timestamp(start) - assert run2.end == run.end == _timeutils._parse_timestamp(end) + assert run2.start == run.start == nm.ts._SecondsNanos.from_flexible(start).to_integral_nanoseconds() + assert run2.end == run.end == nm.ts._SecondsNanos.from_flexible(end).to_integral_nanoseconds() assert run2.properties == run.properties == {} assert run2.labels == run.labels == () @@ -177,8 +176,8 @@ def test_search_runs(): assert run2.rid == run.rid != "" assert run2.name == run.name == name assert run2.description == run.description == desc - assert run2.start == run.start == nominal.ts._SecondsNanos(start).to_integral_nanoseconds() - assert run2.end == run.end == nominal.ts._SecondsNanos(end).to_integral_nanoseconds() + assert run2.start == run.start == nm.ts._SecondsNanos.from_datetime(start).to_integral_nanoseconds() + assert run2.end == run.end == nm.ts._SecondsNanos.from_datetime(end).to_integral_nanoseconds() assert run2.properties == run.properties == {} assert run2.labels == run.labels == () From 82ab2919e39105fae5fe3e86ed6ad7887df460cb Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 14:07:38 -0400 Subject: [PATCH 28/51] unify on time_domain name --- nominal/nominal.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nominal/nominal.py b/nominal/nominal.py index d37afff7..0bac2d9e 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -205,13 +205,13 @@ def create_run_csv( The run start and end times are created from the minimum and maximum timestamps in the CSV file in the timestamp column. """ - typed_timestamp_type = ts._make_typed_time_domain(timestamp_type) - if not isinstance(typed_timestamp_type, (ts.Iso8601, ts.Epoch)): + time_domain = ts._make_typed_time_domain(timestamp_type) + if not isinstance(time_domain, (ts.Iso8601, ts.Epoch)): raise ValueError( "`create_run_csv()` only supports iso8601 or epoch timestamps: use `upload_dataset()` and `create_run()` instead" ) - start, end = _get_start_end_timestamp_csv_file(file, timestamp_column, typed_timestamp_type) - dataset = upload_csv(file, f"Dataset for Run: {name}", timestamp_column, typed_timestamp_type) + start, end = _get_start_end_timestamp_csv_file(file, timestamp_column, time_domain) + dataset = upload_csv(file, f"Dataset for Run: {name}", timestamp_column, time_domain) run = create_run(name, start=start, end=end, description=description) run.add_dataset("dataset", dataset) return run From 2c8df1ec883a8a55ab00af519c934e23da0a9c96 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 14:22:23 -0400 Subject: [PATCH 29/51] documentation once-over on ts.py --- nominal/ts.py | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/nominal/ts.py b/nominal/ts.py index 72bd3a40..444157db 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -30,6 +30,7 @@ ] IntegralNanosecondsUTC: TypeAlias = int +"""A timestamp in nanoseconds since the Unix epoch, UTC.""" class _ConjureTimestampDomain(abc.ABC): @@ -40,12 +41,20 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: @dataclass(frozen=True) class Iso8601(_ConjureTimestampDomain): + """ISO 8601 timestamp format, e.g. '2021-01-01T00:00:00Z' or '2021-01-01T00:00:00.123+00:00'. + The time zone must be specified. + """ + def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(iso8601=ingest_api.Iso8601Timestamp())) @dataclass(frozen=True) class Epoch(_ConjureTimestampDomain): + """An absolute timestamp in numeric format representing time since some epoch. + The timestamp can be integral or floating point, e.g. 1612137600.123 for 2021-02-01T00:00:00.123Z. + """ + unit: _LiteralTimeUnit def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: @@ -55,13 +64,17 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: @dataclass(frozen=True) class Relative(_ConjureTimestampDomain): + """A relative timestamp in numeric format representing time since some start time. + The relative timestamp can be integral or floating point, e.g. 12.123 for 12 seconds and 123 milliseconds after start. + The start time is absolute timestamp format representing time since some epoch. + """ + unit: _LiteralTimeUnit start: datetime | IntegralNanosecondsUTC """The starting time to which all relatives times are relative to.""" def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: - """ - Note: The offset is a conjure datetime. They are serialized as ISO-8601 strings, with up-to nanosecond precision. + """Note: The offset is a conjure datetime. They are serialized as ISO-8601 strings, with up-to nanosecond precision. The Python type for the field is just a str. Ref: - https://github.com/palantir/conjure/blob/master/docs/concepts.md#built-in-types @@ -75,14 +88,22 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: @dataclass(frozen=True) class Custom(_ConjureTimestampDomain): + """A custom timestamp format. The custom timestamps are expected to be absolute timestamps. + + The format string should be in the format of the `DateTimeFormatter` class in Java. + """ + format: str + """Must be in the format of the `DateTimeFormatter` class in Java.""" default_year: int | None = None + """Accepted as an optional field for cases like IRIG time format, where the year is not present.""" def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: fmt = ingest_api.CustomTimestamp(format=self.format, default_year=self.default_year) return ingest_api.TimestampType(absolute=ingest_api.AbsoluteTimestamp(custom_format=fmt)) +# constants for pedagogy, documentation, default arguments, etc. ISO_8601 = Iso8601() EPOCH_NANOSECONDS = Epoch("nanoseconds") EPOCH_MICROSECONDS = Epoch("microseconds") @@ -120,7 +141,10 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: ] TypedTimeDomain: TypeAlias = Union[Iso8601, Epoch, Relative, Custom] +"""Strongly typed time domain types.""" + _AnyTimeDomain: TypeAlias = Union[TypedTimeDomain, _LiteralAbsolute, _LiteralRelativeDeprecated] +"""All allowable time domain types, including string representations.""" def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: @@ -164,6 +188,11 @@ def _time_unit_to_conjure(unit: _LiteralTimeUnit) -> ingest_api.TimeUnit: class _SecondsNanos(NamedTuple): + """A simple internal timestamp representation that can be converted to/from various formats. + + These represent nanosecond-precision epoch timestamps. + """ + seconds: int nanos: int @@ -174,7 +203,11 @@ def to_ingest_api(self) -> ingest_api.UtcTimestamp: return ingest_api.UtcTimestamp(seconds_since_epoch=self.seconds, offset_nanoseconds=self.nanos) def to_iso8601(self) -> str: - """datetime.datetime objects are only microsecond-precise, so we use numpy's datetime64[ns] for nanosecond precision.""" + """datetime.datetime is only microsecond-precise, so we use np.datetime64[ns] to get nanosecond-precision for printing. + Note that nanosecond precision is the maximum allowable for conjure datetime fields. + - https://github.com/palantir/conjure/blob/master/docs/concepts.md#built-in-types + - https://github.com/palantir/conjure/pull/1643 + """ return str(np.datetime64(self.to_integral_nanoseconds(), "ns")) + "Z" def to_integral_nanoseconds(self) -> IntegralNanosecondsUTC: From fd1a7219c4e5089f17263b5a6a281a699d3f7632 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 15:24:23 -0400 Subject: [PATCH 30/51] correct comment about offset --- nominal/ts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nominal/ts.py b/nominal/ts.py index 444157db..4384e93d 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -153,7 +153,7 @@ def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: if not isinstance(domain, str): raise TypeError(f"timestamp type {domain} must be a string or an instance of one of: {TypedTimeDomain}") if domain.startswith("relative_"): - # until this is completely removed, we implicitly assume offset=None in the APIs + # until this is completely removed, we implicitly assume offset=1970-01-01 in the APIs warnings.warn( "specifying 'relative_{unit}' as a string is deprecated and will be removed in a future version: use `nm.timedomain.Relative` instead. " "for example: instead of 'relative_seconds', use `nm.timedomain.Relative('seconds', start=datetime.now())`. ", From a12b0f44aa8674821e69bbd07948f8d65e509ab3 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 15:25:19 -0400 Subject: [PATCH 31/51] .to_integral_nanoseconds() -> .to_ns() --- nominal/cli/run.py | 4 ++-- nominal/core.py | 4 ++-- nominal/nominal.py | 12 +++++------- nominal/ts.py | 4 ++-- tests/e2e/test_toplevel.py | 16 ++++++++-------- 5 files changed, 19 insertions(+), 21 deletions(-) diff --git a/nominal/cli/run.py b/nominal/cli/run.py index f5e65c85..c7b0378c 100644 --- a/nominal/cli/run.py +++ b/nominal/cli/run.py @@ -36,8 +36,8 @@ def create( client = get_client(base_url, token) run = client.create_run( name, - _SecondsNanos.from_flexible(start).to_integral_nanoseconds(), - _SecondsNanos.from_flexible(end).to_integral_nanoseconds(), + _SecondsNanos.from_flexible(start).to_ns(), + _SecondsNanos.from_flexible(end).to_ns(), desc, properties=dict(properties), labels=labels, diff --git a/nominal/core.py b/nominal/core.py index c14c2cf3..7cc0d420 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -159,8 +159,8 @@ def _from_conjure(cls, nominal_client: NominalClient, run: scout_run_api.Run) -> description=run.description, properties=MappingProxyType(run.properties), labels=tuple(run.labels), - start=_SecondsNanos.from_scout_run_api(run.start_time).to_integral_nanoseconds(), - end=(_SecondsNanos.from_scout_run_api(run.end_time).to_integral_nanoseconds() if run.end_time else None), + start=_SecondsNanos.from_scout_run_api(run.start_time).to_ns(), + end=(_SecondsNanos.from_scout_run_api(run.end_time).to_ns() if run.end_time else None), _client=nominal_client, ) diff --git a/nominal/nominal.py b/nominal/nominal.py index 0bac2d9e..8ef6ace5 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -181,8 +181,8 @@ def create_run( conn = get_default_client() return conn.create_run( name, - start=_SecondsNanos.from_flexible(start).to_integral_nanoseconds(), - end=_SecondsNanos.from_flexible(end).to_integral_nanoseconds(), + start=_SecondsNanos.from_flexible(start).to_ns(), + end=_SecondsNanos.from_flexible(end).to_ns(), description=description, ) @@ -242,8 +242,8 @@ def search_runs( raise ValueError("must provide one of: start, end, exact_name, label, or property") conn = get_default_client() runs = conn.search_runs( - start=None if start is None else _SecondsNanos.from_flexible(start).to_integral_nanoseconds(), - end=None if end is None else _SecondsNanos.from_flexible(end).to_integral_nanoseconds(), + start=None if start is None else _SecondsNanos.from_flexible(start).to_ns(), + end=None if end is None else _SecondsNanos.from_flexible(end).to_ns(), exact_name=exact_name, label=label, property=property, @@ -285,9 +285,7 @@ def upload_video( path = Path(file) file_type = FileType.from_path(path) with open(file, "rb") as f: - return conn.create_video_from_io( - f, name, _SecondsNanos.from_flexible(start).to_integral_nanoseconds(), description, file_type - ) + return conn.create_video_from_io(f, name, _SecondsNanos.from_flexible(start).to_ns(), description, file_type) def get_video(rid: str) -> Video: diff --git a/nominal/ts.py b/nominal/ts.py index 4384e93d..63f05480 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -208,9 +208,9 @@ def to_iso8601(self) -> str: - https://github.com/palantir/conjure/blob/master/docs/concepts.md#built-in-types - https://github.com/palantir/conjure/pull/1643 """ - return str(np.datetime64(self.to_integral_nanoseconds(), "ns")) + "Z" + return str(np.datetime64(self.to_ns(), "ns")) + "Z" - def to_integral_nanoseconds(self) -> IntegralNanosecondsUTC: + def to_ns(self) -> IntegralNanosecondsUTC: return self.seconds * 1_000_000_000 + self.nanos @classmethod diff --git a/tests/e2e/test_toplevel.py b/tests/e2e/test_toplevel.py index 507a54eb..03c43974 100644 --- a/tests/e2e/test_toplevel.py +++ b/tests/e2e/test_toplevel.py @@ -113,8 +113,8 @@ def test_create_run(): assert run.rid != "" assert run.name == name assert run.description == desc - assert run.start == nm.ts._SecondsNanos.from_datetime(start).to_integral_nanoseconds() - assert run.end == nm.ts._SecondsNanos.from_datetime(end).to_integral_nanoseconds() + assert run.start == nm.ts._SecondsNanos.from_datetime(start).to_ns() + assert run.end == nm.ts._SecondsNanos.from_datetime(end).to_ns() assert len(run.properties) == 0 assert len(run.labels) == 0 @@ -131,8 +131,8 @@ def test_create_run_csv(csv_data): assert run.rid != "" assert run.name == name assert run.description == desc - assert run.start == nm.ts._SecondsNanos.from_datetime(start).to_integral_nanoseconds() - assert run.end == nm.ts._SecondsNanos.from_datetime(end).to_integral_nanoseconds() + assert run.start == nm.ts._SecondsNanos.from_datetime(start).to_ns() + assert run.end == nm.ts._SecondsNanos.from_datetime(end).to_ns() assert len(run.properties) == 0 assert len(run.labels) == 0 @@ -157,8 +157,8 @@ def test_get_run(): assert run2.rid == run.rid != "" assert run2.name == run.name == name assert run2.description == run.description == desc - assert run2.start == run.start == nm.ts._SecondsNanos.from_flexible(start).to_integral_nanoseconds() - assert run2.end == run.end == nm.ts._SecondsNanos.from_flexible(end).to_integral_nanoseconds() + assert run2.start == run.start == nm.ts._SecondsNanos.from_flexible(start).to_ns() + assert run2.end == run.end == nm.ts._SecondsNanos.from_flexible(end).to_ns() assert run2.properties == run.properties == {} assert run2.labels == run.labels == () @@ -176,8 +176,8 @@ def test_search_runs(): assert run2.rid == run.rid != "" assert run2.name == run.name == name assert run2.description == run.description == desc - assert run2.start == run.start == nm.ts._SecondsNanos.from_datetime(start).to_integral_nanoseconds() - assert run2.end == run.end == nm.ts._SecondsNanos.from_datetime(end).to_integral_nanoseconds() + assert run2.start == run.start == nm.ts._SecondsNanos.from_datetime(start).to_ns() + assert run2.end == run.end == nm.ts._SecondsNanos.from_datetime(end).to_ns() assert run2.properties == run.properties == {} assert run2.labels == run.labels == () From 30cb8e632bf1bced2a967600212a771ca91ac516 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 15:26:42 -0400 Subject: [PATCH 32/51] ns felt too short --- nominal/cli/run.py | 4 ++-- nominal/core.py | 4 ++-- nominal/nominal.py | 12 +++++++----- nominal/ts.py | 8 ++++---- tests/e2e/test_toplevel.py | 16 ++++++++-------- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/nominal/cli/run.py b/nominal/cli/run.py index c7b0378c..bde809bb 100644 --- a/nominal/cli/run.py +++ b/nominal/cli/run.py @@ -36,8 +36,8 @@ def create( client = get_client(base_url, token) run = client.create_run( name, - _SecondsNanos.from_flexible(start).to_ns(), - _SecondsNanos.from_flexible(end).to_ns(), + _SecondsNanos.from_flexible(start).to_nanoseconds(), + _SecondsNanos.from_flexible(end).to_nanoseconds(), desc, properties=dict(properties), labels=labels, diff --git a/nominal/core.py b/nominal/core.py index 7cc0d420..7771d4c6 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -159,8 +159,8 @@ def _from_conjure(cls, nominal_client: NominalClient, run: scout_run_api.Run) -> description=run.description, properties=MappingProxyType(run.properties), labels=tuple(run.labels), - start=_SecondsNanos.from_scout_run_api(run.start_time).to_ns(), - end=(_SecondsNanos.from_scout_run_api(run.end_time).to_ns() if run.end_time else None), + start=_SecondsNanos.from_scout_run_api(run.start_time).to_nanoseconds(), + end=(_SecondsNanos.from_scout_run_api(run.end_time).to_nanoseconds() if run.end_time else None), _client=nominal_client, ) diff --git a/nominal/nominal.py b/nominal/nominal.py index 8ef6ace5..b7bb0575 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -181,8 +181,8 @@ def create_run( conn = get_default_client() return conn.create_run( name, - start=_SecondsNanos.from_flexible(start).to_ns(), - end=_SecondsNanos.from_flexible(end).to_ns(), + start=_SecondsNanos.from_flexible(start).to_nanoseconds(), + end=_SecondsNanos.from_flexible(end).to_nanoseconds(), description=description, ) @@ -242,8 +242,8 @@ def search_runs( raise ValueError("must provide one of: start, end, exact_name, label, or property") conn = get_default_client() runs = conn.search_runs( - start=None if start is None else _SecondsNanos.from_flexible(start).to_ns(), - end=None if end is None else _SecondsNanos.from_flexible(end).to_ns(), + start=None if start is None else _SecondsNanos.from_flexible(start).to_nanoseconds(), + end=None if end is None else _SecondsNanos.from_flexible(end).to_nanoseconds(), exact_name=exact_name, label=label, property=property, @@ -285,7 +285,9 @@ def upload_video( path = Path(file) file_type = FileType.from_path(path) with open(file, "rb") as f: - return conn.create_video_from_io(f, name, _SecondsNanos.from_flexible(start).to_ns(), description, file_type) + return conn.create_video_from_io( + f, name, _SecondsNanos.from_flexible(start).to_nanoseconds(), description, file_type + ) def get_video(rid: str) -> Video: diff --git a/nominal/ts.py b/nominal/ts.py index 63f05480..71d998a2 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -208,9 +208,9 @@ def to_iso8601(self) -> str: - https://github.com/palantir/conjure/blob/master/docs/concepts.md#built-in-types - https://github.com/palantir/conjure/pull/1643 """ - return str(np.datetime64(self.to_ns(), "ns")) + "Z" + return str(np.datetime64(self.to_nanoseconds(), "ns")) + "Z" - def to_ns(self) -> IntegralNanosecondsUTC: + def to_nanoseconds(self) -> IntegralNanosecondsUTC: return self.seconds * 1_000_000_000 + self.nanos @classmethod @@ -225,14 +225,14 @@ def from_datetime(cls, dt: datetime) -> Self: return cls(seconds, nanos) @classmethod - def from_integral_nanoseconds(cls, ts: IntegralNanosecondsUTC) -> Self: + def from_nanoseconds(cls, ts: IntegralNanosecondsUTC) -> Self: seconds, nanos = divmod(ts, 1_000_000_000) return cls(seconds, nanos) @classmethod def from_flexible(cls, ts: str | datetime | IntegralNanosecondsUTC) -> Self: if isinstance(ts, int): - return cls.from_integral_nanoseconds(ts) + return cls.from_nanoseconds(ts) if isinstance(ts, str): ts = dateutil.parser.parse(ts) return cls.from_datetime(ts) diff --git a/tests/e2e/test_toplevel.py b/tests/e2e/test_toplevel.py index 03c43974..eac2d234 100644 --- a/tests/e2e/test_toplevel.py +++ b/tests/e2e/test_toplevel.py @@ -113,8 +113,8 @@ def test_create_run(): assert run.rid != "" assert run.name == name assert run.description == desc - assert run.start == nm.ts._SecondsNanos.from_datetime(start).to_ns() - assert run.end == nm.ts._SecondsNanos.from_datetime(end).to_ns() + assert run.start == nm.ts._SecondsNanos.from_datetime(start).to_nanoseconds() + assert run.end == nm.ts._SecondsNanos.from_datetime(end).to_nanoseconds() assert len(run.properties) == 0 assert len(run.labels) == 0 @@ -131,8 +131,8 @@ def test_create_run_csv(csv_data): assert run.rid != "" assert run.name == name assert run.description == desc - assert run.start == nm.ts._SecondsNanos.from_datetime(start).to_ns() - assert run.end == nm.ts._SecondsNanos.from_datetime(end).to_ns() + assert run.start == nm.ts._SecondsNanos.from_datetime(start).to_nanoseconds() + assert run.end == nm.ts._SecondsNanos.from_datetime(end).to_nanoseconds() assert len(run.properties) == 0 assert len(run.labels) == 0 @@ -157,8 +157,8 @@ def test_get_run(): assert run2.rid == run.rid != "" assert run2.name == run.name == name assert run2.description == run.description == desc - assert run2.start == run.start == nm.ts._SecondsNanos.from_flexible(start).to_ns() - assert run2.end == run.end == nm.ts._SecondsNanos.from_flexible(end).to_ns() + assert run2.start == run.start == nm.ts._SecondsNanos.from_flexible(start).to_nanoseconds() + assert run2.end == run.end == nm.ts._SecondsNanos.from_flexible(end).to_nanoseconds() assert run2.properties == run.properties == {} assert run2.labels == run.labels == () @@ -176,8 +176,8 @@ def test_search_runs(): assert run2.rid == run.rid != "" assert run2.name == run.name == name assert run2.description == run.description == desc - assert run2.start == run.start == nm.ts._SecondsNanos.from_datetime(start).to_ns() - assert run2.end == run.end == nm.ts._SecondsNanos.from_datetime(end).to_ns() + assert run2.start == run.start == nm.ts._SecondsNanos.from_datetime(start).to_nanoseconds() + assert run2.end == run.end == nm.ts._SecondsNanos.from_datetime(end).to_nanoseconds() assert run2.properties == run.properties == {} assert run2.labels == run.labels == () From b4667d66349cf420cc871814c995a330ea64e33f Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 15:55:48 -0400 Subject: [PATCH 33/51] typing extensions alias, self --- nominal/ts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nominal/ts.py b/nominal/ts.py index 71d998a2..97897771 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -5,11 +5,11 @@ from dataclasses import dataclass from datetime import datetime, timezone from types import MappingProxyType -from typing import Literal, Mapping, NamedTuple, Self, Union +from typing import Literal, Mapping, NamedTuple, Union import dateutil.parser import numpy as np -from typing_extensions import TypeAlias +from typing_extensions import TypeAlias, Self from ._api.combined import ingest_api, scout_run_api From 5eefad266a4b557956ea8555518ea5b5c883f91a Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 15:57:27 -0400 Subject: [PATCH 34/51] check types for all python versions --- justfile | 5 ++++- nominal/ts.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/justfile b/justfile index 746e9128..1206b290 100644 --- a/justfile +++ b/justfile @@ -12,7 +12,10 @@ test-e2e token: # check static typing check-types: - poetry run mypy + poetry run mypy --python-version 3.12 + poetry run mypy --python-version 3.11 + poetry run mypy --python-version 3.10 + poetry run mypy --python-version 3.9 # check code formatting | fix with `just fix-format` check-format: diff --git a/nominal/ts.py b/nominal/ts.py index 97897771..a6fe7c85 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -9,7 +9,7 @@ import dateutil.parser import numpy as np -from typing_extensions import TypeAlias, Self +from typing_extensions import Self, TypeAlias from ._api.combined import ingest_api, scout_run_api From de8ce099dac8bb3dfa4a1412f5cb695591690a7d Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 16:11:18 -0400 Subject: [PATCH 35/51] domain -> type --- nominal/core.py | 10 +++++----- nominal/nominal.py | 16 ++++++++-------- nominal/ts.py | 42 +++++++++++++++++++++--------------------- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/nominal/core.py b/nominal/core.py index 7771d4c6..c3064b89 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -29,7 +29,7 @@ from ._multipart import put_multipart_upload from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed -from .ts import IntegralNanosecondsUTC, TypedTimeDomain, _SecondsNanos +from .ts import IntegralNanosecondsUTC, TypedTimestampType, _SecondsNanos __all__ = [ "NominalClient", @@ -237,7 +237,7 @@ def update( update_dataclass(self, dataset, fields=self.__dataclass_fields__) return self - def add_csv_to_dataset(self, path: Path | str, timestamp_column: str, timestamp_type: TypedTimeDomain) -> None: + def add_csv_to_dataset(self, path: Path | str, timestamp_column: str, timestamp_type: TypedTimestampType) -> None: """Append to a dataset from a csv on-disk.""" path, file_type = _verify_csv_path(path) with open(path, "rb") as csv_file: @@ -247,7 +247,7 @@ def add_to_dataset_from_io( self, dataset: BinaryIO, timestamp_column: str, - timestamp_type: TypedTimeDomain, + timestamp_type: TypedTimestampType, file_type: tuple[str, str] | FileType = FileTypes.CSV, ) -> None: """Append to a dataset from a file-like object. @@ -573,7 +573,7 @@ def create_csv_dataset( path: Path | str, name: str | None, timestamp_column: str, - timestamp_type: TypedTimeDomain, + timestamp_type: TypedTimestampType, description: str | None = None, *, labels: Sequence[str] = (), @@ -605,7 +605,7 @@ def create_dataset_from_io( dataset: BinaryIO, name: str, timestamp_column: str, - timestamp_type: TypedTimeDomain, + timestamp_type: TypedTimestampType, file_type: tuple[str, str] | FileType = FileTypes.CSV, description: str | None = None, *, diff --git a/nominal/nominal.py b/nominal/nominal.py index b7bb0575..d0947416 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -52,14 +52,14 @@ def upload_pandas( df: pd.DataFrame, name: str, timestamp_column: str, - timestamp_type: ts._AnyTimeDomain, + timestamp_type: ts._AnyTimestampType, description: str | None = None, *, wait_until_complete: bool = True, ) -> Dataset: """Create a dataset in the Nominal platform from a pandas.DataFrame.""" conn = get_default_client() - time_domain = ts._make_typed_time_domain(timestamp_type) + time_domain = ts._make_typed_timestamp_type(timestamp_type) # TODO(alkasm): use parquet instead of CSV as an intermediary @@ -89,14 +89,14 @@ def upload_polars( df: pl.DataFrame, name: str, timestamp_column: str, - timestamp_type: ts._AnyTimeDomain, + timestamp_type: ts._AnyTimestampType, description: str | None = None, *, wait_until_complete: bool = True, ) -> Dataset: """Create a dataset in the Nominal platform from a polars.DataFrame.""" conn = get_default_client() - time_domain = ts._make_typed_time_domain(timestamp_type) + time_domain = ts._make_typed_timestamp_type(timestamp_type) def write_and_close(df: pl.DataFrame, w: BinaryIO) -> None: df.write_csv(w) @@ -124,7 +124,7 @@ def upload_csv( file: Path | str, name: str | None, timestamp_column: str, - timestamp_type: ts._AnyTimeDomain, + timestamp_type: ts._AnyTimestampType, description: str | None = None, *, wait_until_complete: bool = True, @@ -144,12 +144,12 @@ def _upload_csv( file: Path | str, name: str | None, timestamp_column: str, - timestamp_type: ts._AnyTimeDomain, + timestamp_type: ts._AnyTimestampType, description: str | None = None, *, wait_until_complete: bool = True, ) -> Dataset: - time_domain = ts._make_typed_time_domain(timestamp_type) + time_domain = ts._make_typed_timestamp_type(timestamp_type) dataset = conn.create_csv_dataset( file, name, @@ -205,7 +205,7 @@ def create_run_csv( The run start and end times are created from the minimum and maximum timestamps in the CSV file in the timestamp column. """ - time_domain = ts._make_typed_time_domain(timestamp_type) + time_domain = ts._make_typed_timestamp_type(timestamp_type) if not isinstance(time_domain, (ts.Iso8601, ts.Epoch)): raise ValueError( "`create_run_csv()` only supports iso8601 or epoch timestamps: use `upload_dataset()` and `create_run()` instead" diff --git a/nominal/ts.py b/nominal/ts.py index a6fe7c85..b084d263 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -25,7 +25,7 @@ "EPOCH_SECONDS", "EPOCH_MINUTES", "EPOCH_HOURS", - "TypedTimeDomain", + "TypedTimestampType", "IntegralNanosecondsUTC", ] @@ -33,14 +33,14 @@ """A timestamp in nanoseconds since the Unix epoch, UTC.""" -class _ConjureTimestampDomain(abc.ABC): +class _ConjureTimestampType(abc.ABC): @abc.abstractmethod def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: pass @dataclass(frozen=True) -class Iso8601(_ConjureTimestampDomain): +class Iso8601(_ConjureTimestampType): """ISO 8601 timestamp format, e.g. '2021-01-01T00:00:00Z' or '2021-01-01T00:00:00.123+00:00'. The time zone must be specified. """ @@ -50,7 +50,7 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: @dataclass(frozen=True) -class Epoch(_ConjureTimestampDomain): +class Epoch(_ConjureTimestampType): """An absolute timestamp in numeric format representing time since some epoch. The timestamp can be integral or floating point, e.g. 1612137600.123 for 2021-02-01T00:00:00.123Z. """ @@ -63,7 +63,7 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: @dataclass(frozen=True) -class Relative(_ConjureTimestampDomain): +class Relative(_ConjureTimestampType): """A relative timestamp in numeric format representing time since some start time. The relative timestamp can be integral or floating point, e.g. 12.123 for 12 seconds and 123 milliseconds after start. The start time is absolute timestamp format representing time since some epoch. @@ -87,7 +87,7 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: @dataclass(frozen=True) -class Custom(_ConjureTimestampDomain): +class Custom(_ConjureTimestampType): """A custom timestamp format. The custom timestamps are expected to be absolute timestamps. The format string should be in the format of the `DateTimeFormatter` class in Java. @@ -140,28 +140,28 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: "relative_hours", ] -TypedTimeDomain: TypeAlias = Union[Iso8601, Epoch, Relative, Custom] -"""Strongly typed time domain types.""" +TypedTimestampType: TypeAlias = Union[Iso8601, Epoch, Relative, Custom] +"""Strongly typed timestamp types.""" -_AnyTimeDomain: TypeAlias = Union[TypedTimeDomain, _LiteralAbsolute, _LiteralRelativeDeprecated] -"""All allowable time domain types, including string representations.""" +_AnyTimestampType: TypeAlias = Union[TypedTimestampType, _LiteralAbsolute, _LiteralRelativeDeprecated] +"""All allowable timestamp types, including string representations.""" -def _make_typed_time_domain(domain: _AnyTimeDomain) -> TypedTimeDomain: - if isinstance(domain, (Iso8601, Epoch, Relative, Custom)): - return domain - if not isinstance(domain, str): - raise TypeError(f"timestamp type {domain} must be a string or an instance of one of: {TypedTimeDomain}") - if domain.startswith("relative_"): +def _make_typed_timestamp_type(type_: _AnyTimestampType) -> TypedTimestampType: + if isinstance(type_, (Iso8601, Epoch, Relative, Custom)): + return type_ + if not isinstance(type_, str): + raise TypeError(f"timestamp type {type_} must be a string or an instance of one of: {TypedTimestampType}") + if type_.startswith("relative_"): # until this is completely removed, we implicitly assume offset=1970-01-01 in the APIs warnings.warn( - "specifying 'relative_{unit}' as a string is deprecated and will be removed in a future version: use `nm.timedomain.Relative` instead. " - "for example: instead of 'relative_seconds', use `nm.timedomain.Relative('seconds', start=datetime.now())`. ", + "specifying 'relative_{unit}' as a string is deprecated and will be removed in a future version: use `nm.ts.Relative` instead. " + "for example: instead of 'relative_seconds', use `nm.ts.Relative('seconds', start=datetime.now())`. ", UserWarning, ) - if domain not in _str_to_type: - raise ValueError(f"string time domains must be one of: {_str_to_type.keys()}") - return _str_to_type[domain] + if type_ not in _str_to_type: + raise ValueError(f"string timestamp types must be one of: {_str_to_type.keys()}") + return _str_to_type[type_] def _time_unit_to_conjure(unit: _LiteralTimeUnit) -> ingest_api.TimeUnit: From ee1b3685b1da711ba94719c0492ef2b4ca8d660f Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 16:15:40 -0400 Subject: [PATCH 36/51] expand core to string types --- nominal/core.py | 14 +++++++------- nominal/nominal.py | 17 +++++++---------- nominal/ts.py | 2 +- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/nominal/core.py b/nominal/core.py index c3064b89..0edfe4f2 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -29,7 +29,7 @@ from ._multipart import put_multipart_upload from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed -from .ts import IntegralNanosecondsUTC, TypedTimestampType, _SecondsNanos +from .ts import IntegralNanosecondsUTC, _AnyTimestampType, _to_typed_timestamp_type, _SecondsNanos __all__ = [ "NominalClient", @@ -237,7 +237,7 @@ def update( update_dataclass(self, dataset, fields=self.__dataclass_fields__) return self - def add_csv_to_dataset(self, path: Path | str, timestamp_column: str, timestamp_type: TypedTimestampType) -> None: + def add_csv_to_dataset(self, path: Path | str, timestamp_column: str, timestamp_type: _AnyTimestampType) -> None: """Append to a dataset from a csv on-disk.""" path, file_type = _verify_csv_path(path) with open(path, "rb") as csv_file: @@ -247,7 +247,7 @@ def add_to_dataset_from_io( self, dataset: BinaryIO, timestamp_column: str, - timestamp_type: TypedTimestampType, + timestamp_type: _AnyTimestampType, file_type: tuple[str, str] | FileType = FileTypes.CSV, ) -> None: """Append to a dataset from a file-like object. @@ -274,7 +274,7 @@ def add_to_dataset_from_io( source_metadata=ingest_api.IngestSourceMetadata( timestamp_metadata=ingest_api.TimestampMetadata( series_name=timestamp_column, - timestamp_type=timestamp_type._to_conjure_ingest_api(), + timestamp_type=_to_typed_timestamp_type(timestamp_type)._to_conjure_ingest_api(), ), ), ) @@ -573,7 +573,7 @@ def create_csv_dataset( path: Path | str, name: str | None, timestamp_column: str, - timestamp_type: TypedTimestampType, + timestamp_type: _AnyTimestampType, description: str | None = None, *, labels: Sequence[str] = (), @@ -605,7 +605,7 @@ def create_dataset_from_io( dataset: BinaryIO, name: str, timestamp_column: str, - timestamp_type: TypedTimestampType, + timestamp_type: _AnyTimestampType, file_type: tuple[str, str] | FileType = FileTypes.CSV, description: str | None = None, *, @@ -645,7 +645,7 @@ def create_dataset_from_io( source_metadata=ingest_api.IngestSourceMetadata( timestamp_metadata=ingest_api.TimestampMetadata( series_name=timestamp_column, - timestamp_type=timestamp_type._to_conjure_ingest_api(), + timestamp_type=_to_typed_timestamp_type(timestamp_type)._to_conjure_ingest_api(), ), ), ) diff --git a/nominal/nominal.py b/nominal/nominal.py index d0947416..2b9db4ee 100644 --- a/nominal/nominal.py +++ b/nominal/nominal.py @@ -59,7 +59,6 @@ def upload_pandas( ) -> Dataset: """Create a dataset in the Nominal platform from a pandas.DataFrame.""" conn = get_default_client() - time_domain = ts._make_typed_timestamp_type(timestamp_type) # TODO(alkasm): use parquet instead of CSV as an intermediary @@ -75,7 +74,7 @@ def write_and_close(df: pd.DataFrame, w: BinaryIO) -> None: reader, name, timestamp_column=timestamp_column, - timestamp_type=time_domain, + timestamp_type=timestamp_type, file_type=FileTypes.CSV, description=description, ) @@ -96,7 +95,6 @@ def upload_polars( ) -> Dataset: """Create a dataset in the Nominal platform from a polars.DataFrame.""" conn = get_default_client() - time_domain = ts._make_typed_timestamp_type(timestamp_type) def write_and_close(df: pl.DataFrame, w: BinaryIO) -> None: df.write_csv(w) @@ -110,7 +108,7 @@ def write_and_close(df: pl.DataFrame, w: BinaryIO) -> None: reader, name, timestamp_column=timestamp_column, - timestamp_type=time_domain, + timestamp_type=timestamp_type, file_type=FileTypes.CSV, description=description, ) @@ -149,12 +147,11 @@ def _upload_csv( *, wait_until_complete: bool = True, ) -> Dataset: - time_domain = ts._make_typed_timestamp_type(timestamp_type) dataset = conn.create_csv_dataset( file, name, timestamp_column=timestamp_column, - timestamp_type=time_domain, + timestamp_type=timestamp_type, description=description, ) if wait_until_complete: @@ -205,13 +202,13 @@ def create_run_csv( The run start and end times are created from the minimum and maximum timestamps in the CSV file in the timestamp column. """ - time_domain = ts._make_typed_timestamp_type(timestamp_type) - if not isinstance(time_domain, (ts.Iso8601, ts.Epoch)): + ts_type = ts._to_typed_timestamp_type(timestamp_type) + if not isinstance(ts_type, (ts.Iso8601, ts.Epoch)): raise ValueError( "`create_run_csv()` only supports iso8601 or epoch timestamps: use `upload_dataset()` and `create_run()` instead" ) - start, end = _get_start_end_timestamp_csv_file(file, timestamp_column, time_domain) - dataset = upload_csv(file, f"Dataset for Run: {name}", timestamp_column, time_domain) + start, end = _get_start_end_timestamp_csv_file(file, timestamp_column, ts_type) + dataset = upload_csv(file, f"Dataset for Run: {name}", timestamp_column, ts_type) run = create_run(name, start=start, end=end, description=description) run.add_dataset("dataset", dataset) return run diff --git a/nominal/ts.py b/nominal/ts.py index b084d263..2fa28c58 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -147,7 +147,7 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: """All allowable timestamp types, including string representations.""" -def _make_typed_timestamp_type(type_: _AnyTimestampType) -> TypedTimestampType: +def _to_typed_timestamp_type(type_: _AnyTimestampType) -> TypedTimestampType: if isinstance(type_, (Iso8601, Epoch, Relative, Custom)): return type_ if not isinstance(type_, str): From d16efbe9431be59ef7aae5a321c6a7ef91ea79ed Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 16:16:53 -0400 Subject: [PATCH 37/51] fix --- nominal/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nominal/core.py b/nominal/core.py index 0edfe4f2..891eeffb 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -29,7 +29,7 @@ from ._multipart import put_multipart_upload from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed -from .ts import IntegralNanosecondsUTC, _AnyTimestampType, _to_typed_timestamp_type, _SecondsNanos +from .ts import IntegralNanosecondsUTC, _AnyTimestampType, _SecondsNanos, _to_typed_timestamp_type __all__ = [ "NominalClient", From d98f573136b02b0e7ddadb757d028991ca0e4701 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 23:40:19 -0400 Subject: [PATCH 38/51] missed a save --- nominal/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nominal/_utils.py b/nominal/_utils.py index 6e0f91da..b3480932 100644 --- a/nominal/_utils.py +++ b/nominal/_utils.py @@ -10,7 +10,7 @@ from typing_extensions import TypeAlias -from .ts import IntegralNanosecondsUTC # typing.TypeAlias in 3.10+ +from .ts import IntegralNanosecondsUTC from ._api.combined import api, ingest_api, scout_run_api From b6db0279c7ff1210a0c9e53045b0a55006996da0 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 23:48:10 -0400 Subject: [PATCH 39/51] add new timestamp type --- nominal/_utils.py | 30 +----------------------------- nominal/core.py | 29 ++++++++--------------------- nominal/ts.py | 12 ++++++++++++ 3 files changed, 21 insertions(+), 50 deletions(-) diff --git a/nominal/_utils.py b/nominal/_utils.py index b3480932..917e50e4 100644 --- a/nominal/_utils.py +++ b/nominal/_utils.py @@ -1,44 +1,19 @@ from __future__ import annotations -from datetime import datetime import logging import mimetypes import os from contextlib import contextmanager from pathlib import Path -from typing import BinaryIO, Iterable, Iterator, Literal, NamedTuple, TypeVar, Union +from typing import BinaryIO, Iterable, Iterator, Literal, NamedTuple, TypeVar from typing_extensions import TypeAlias -from .ts import IntegralNanosecondsUTC - -from ._api.combined import api, ingest_api, scout_run_api - logger = logging.getLogger(__name__) T = TypeVar("T") -def _flexible_time_to_global_conjure_api( - timestamp: datetime | IntegralNanosecondsUTC, -) -> api.Timestamp: - seconds, nanos = _flexible_time_to_seconds_nanos(timestamp) - return api.Timestamp(seconds=seconds, nanos=nanos) - - -def _global_conjure_api_to_integral_nanoseconds( - timestamp: api.Timestamp, -) -> IntegralNanosecondsUTC: - return timestamp.seconds * 1_000_000_000 + timestamp.nanos - - -def _flexible_time_to_integral_nanoseconds( - timestamp: datetime | IntegralNanosecondsUTC, -) -> IntegralNanosecondsUTC: - seconds, nanos = _flexible_time_to_seconds_nanos(timestamp) - return seconds * 1_000_000_000 + nanos - - def construct_user_agent_string() -> str: """Constructs a user-agent string with system & Python metadata. E.g.: nominal-python/1.0.0b0 (macOS-14.4-arm64-arm-64bit) cpython/3.12.4 @@ -111,6 +86,3 @@ def reader_writer() -> Iterator[tuple[BinaryIO, BinaryIO]]: finally: w.close() r.close() - - -LogTimestampType: TypeAlias = Literal["absolute", "relative"] diff --git a/nominal/core.py b/nominal/core.py index 938d28af..5f0d11f8 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -8,11 +8,11 @@ from io import TextIOBase from pathlib import Path from types import MappingProxyType -from typing import BinaryIO, Iterable, Iterator, Mapping, Sequence, cast +from typing import BinaryIO, Iterable, Mapping, Sequence, cast import certifi from conjure_python_client import RequestsClient, ServiceConfiguration, SslConfiguration -from typing_extensions import Self # typing.Self in 3.11+ +from typing_extensions import Self from nominal import _config @@ -31,19 +31,8 @@ ) from ._multipart import put_multipart_upload from ._utils import FileType, FileTypes, construct_user_agent_string, update_dataclass -from ._utils import ( - FileType, - FileTypes, - IntegralNanosecondsUTC, - LogTimestampType, - _flexible_time_to_global_conjure_api, - _flexible_time_to_integral_nanoseconds, - _global_conjure_api_to_integral_nanoseconds, - construct_user_agent_string, - update_dataclass, -) from .exceptions import NominalIngestError, NominalIngestFailed -from .ts import IntegralNanosecondsUTC, _AnyTimestampType, _SecondsNanos, _to_typed_timestamp_type +from .ts import IntegralNanosecondsUTC, LogTimestampType, _AnyTimestampType, _SecondsNanos, _to_typed_timestamp_type __all__ = [ "NominalClient", @@ -75,16 +64,14 @@ def add_dataset(self, ref_name: str, dataset: Dataset | str) -> None: self.add_datasets({ref_name: dataset}) def add_log_set(self, ref_name: str, log_set: LogSet | str) -> None: - """ - Add a log set to this run. + """Add a log set to this run. Log sets map "ref names" (their name within the run) to a Log set (or log set rid). """ self.add_log_sets({ref_name: log_set}) def add_log_sets(self, log_sets: Mapping[str, LogSet | str]) -> None: - """ - Add multiple log sets to this run. + """Add multiple log sets to this run. Log sets map "ref names" (their name within the run) to a Log set (or log set rid). """ @@ -375,7 +362,7 @@ class Log: def _to_conjure(self) -> datasource_logset_api.Log: return datasource_logset_api.Log( - time=_flexible_time_to_global_conjure_api(self.timestamp), + time=_SecondsNanos.from_nanoseconds(self.timestamp).to_api(), body=datasource_logset_api.LogBody( basic=datasource_logset_api.BasicLogBody(message=self.body, properties={}), ), @@ -385,7 +372,7 @@ def _to_conjure(self) -> datasource_logset_api.Log: def _from_conjure(cls, log: datasource_logset_api.Log) -> Self: if log.body.basic is None: raise RuntimeError(f"unhandled log body type: expected 'basic' but got {log.body.type!r}") - return cls(timestamp=_global_conjure_api_to_integral_nanoseconds(log.time), body=log.body.basic.message) + return cls(timestamp=_SecondsNanos.from_api(log.time).to_nanoseconds(), body=log.body.basic.message) @dataclass(frozen=True) @@ -1008,4 +995,4 @@ def _logs_to_conjure( yield log._to_conjure() elif isinstance(log, tuple): ts, body = log - yield Log(timestamp=_flexible_time_to_integral_nanoseconds(ts), body=body)._to_conjure() + yield Log(timestamp=_SecondsNanos.from_flexible(ts).to_nanoseconds(), body=body)._to_conjure() diff --git a/nominal/ts.py b/nominal/ts.py index 2fa28c58..03ea5d1b 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -11,6 +11,8 @@ import numpy as np from typing_extensions import Self, TypeAlias +from nominal._api.combined import api + from ._api.combined import ingest_api, scout_run_api __all__ = [ @@ -32,6 +34,8 @@ IntegralNanosecondsUTC: TypeAlias = int """A timestamp in nanoseconds since the Unix epoch, UTC.""" +LogTimestampType: TypeAlias = Literal["absolute", "relative"] + class _ConjureTimestampType(abc.ABC): @abc.abstractmethod @@ -202,6 +206,9 @@ def to_scout_run_api(self) -> scout_run_api.UtcTimestamp: def to_ingest_api(self) -> ingest_api.UtcTimestamp: return ingest_api.UtcTimestamp(seconds_since_epoch=self.seconds, offset_nanoseconds=self.nanos) + def to_api(self) -> api.Timestamp: + return api.Timestamp(seconds=self.seconds, nanos=self.nanos) + def to_iso8601(self) -> str: """datetime.datetime is only microsecond-precise, so we use np.datetime64[ns] to get nanosecond-precision for printing. Note that nanosecond precision is the maximum allowable for conjure datetime fields. @@ -217,6 +224,11 @@ def to_nanoseconds(self) -> IntegralNanosecondsUTC: def from_scout_run_api(cls, ts: scout_run_api.UtcTimestamp) -> Self: return cls(seconds=ts.seconds_since_epoch, nanos=ts.offset_nanoseconds or 0) + @classmethod + def from_api(cls, timestamp: api.Timestamp) -> Self: + # TODO(alkasm): warn on pico-second precision loss + return cls(timestamp.seconds, timestamp.nanos) + @classmethod def from_datetime(cls, dt: datetime) -> Self: dt = dt.astimezone(timezone.utc) From ab6a8302d6aa572ec5cbb6bbfcb797879e5eeafc Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 23:49:19 -0400 Subject: [PATCH 40/51] fix e2e test --- tests/e2e/test_core.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/e2e/test_core.py b/tests/e2e/test_core.py index f0dd56a1..33a2a88b 100644 --- a/tests/e2e/test_core.py +++ b/tests/e2e/test_core.py @@ -3,7 +3,6 @@ from uuid import uuid4 import nominal as nm -from nominal._utils import _datetime_to_integral_nanoseconds from . import _create_random_start_end @@ -133,7 +132,10 @@ def test_create_get_log_set(client: nm.NominalClient): name = f"logset-{uuid4()}" desc = f"core test to create & get a log set {uuid4()}" start, _ = _create_random_start_end() - logs = [(_datetime_to_integral_nanoseconds(start + timedelta(seconds=i)), f"Log message {i}") for i in range(5)] + logs = [ + (nm.ts._SecondsNanos.from_datetime(start + timedelta(seconds=i)).to_nanoseconds(), f"Log message {i}") + for i in range(5) + ] logset = client.create_log_set(name, logs, "absolute", desc) logset2 = nm.get_log_set(logset.rid) From 3bcb63ea5e820ca5a22144905ee82a2003345e02 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Tue, 24 Sep 2024 23:50:35 -0400 Subject: [PATCH 41/51] export LogTimestampType from ts --- nominal/ts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nominal/ts.py b/nominal/ts.py index 03ea5d1b..e36680d9 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -29,6 +29,7 @@ "EPOCH_HOURS", "TypedTimestampType", "IntegralNanosecondsUTC", + "LogTimestampType", ] IntegralNanosecondsUTC: TypeAlias = int From 599878f0ed71293e2d96060b168a43364ed4b2b6 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Wed, 25 Sep 2024 00:04:07 -0400 Subject: [PATCH 42/51] add unit tests for time conversions --- tests/test_ts.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/test_ts.py diff --git a/tests/test_ts.py b/tests/test_ts.py new file mode 100644 index 00000000..5bc30bd3 --- /dev/null +++ b/tests/test_ts.py @@ -0,0 +1,37 @@ +from datetime import datetime +import time + +import pytest +import nominal as nm + + +@pytest.mark.parametrize( + "t", + [ + nm.ts._SecondsNanos.from_nanoseconds(time.time_ns()), + nm.ts._SecondsNanos.from_datetime(datetime.now()), + ], +) +def test_time_conversions(t: nm.ts._SecondsNanos): + assert t.seconds == t.to_nanoseconds() // 1_000_000_000 + assert t.nanos == t.to_nanoseconds() % 1_000_000_000 + assert t == t.from_nanoseconds(t.to_nanoseconds()) + + assert t.seconds == t.to_api().seconds + assert t.nanos == t.to_api().nanos + assert t == t.from_api(t.to_api()) + + assert t.seconds == t.to_scout_run_api().seconds_since_epoch + assert t.nanos == t.to_scout_run_api().offset_nanoseconds + assert t == t.from_scout_run_api(t.to_scout_run_api()) + + assert t.seconds == t.to_ingest_api().seconds_since_epoch + assert t.nanos == t.to_ingest_api().offset_nanoseconds + # no from_ingest_api method + + assert t == t.from_flexible(t.to_iso8601()) + assert t == t.from_flexible(t.to_nanoseconds()) + + # datetime objects don't have nanosecond precision + assert t.seconds == int(datetime.fromisoformat(t.to_iso8601()).timestamp()) + assert t.seconds == t.from_flexible(datetime.fromisoformat(t.to_iso8601())).seconds From b0c48d267a14bddabae9fd6d715d2f3ff83ea63f Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Wed, 25 Sep 2024 00:06:34 -0400 Subject: [PATCH 43/51] cleanups --- nominal/_utils.py | 4 +--- tests/test_ts.py | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/nominal/_utils.py b/nominal/_utils.py index 917e50e4..c83ae486 100644 --- a/nominal/_utils.py +++ b/nominal/_utils.py @@ -5,9 +5,7 @@ import os from contextlib import contextmanager from pathlib import Path -from typing import BinaryIO, Iterable, Iterator, Literal, NamedTuple, TypeVar - -from typing_extensions import TypeAlias +from typing import BinaryIO, Iterable, Iterator, NamedTuple, TypeVar logger = logging.getLogger(__name__) diff --git a/tests/test_ts.py b/tests/test_ts.py index 5bc30bd3..dcfa45ed 100644 --- a/tests/test_ts.py +++ b/tests/test_ts.py @@ -1,7 +1,8 @@ -from datetime import datetime import time +from datetime import datetime import pytest + import nominal as nm From 0fa2d61cbcb46e099827ce4a8ddbb60c529684e1 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 30 Sep 2024 14:34:40 -0400 Subject: [PATCH 44/51] fix e2e tests --- tests/e2e/test_core.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/e2e/test_core.py b/tests/e2e/test_core.py index 18ee358f..344636a7 100644 --- a/tests/e2e/test_core.py +++ b/tests/e2e/test_core.py @@ -3,6 +3,7 @@ from uuid import uuid4 import nominal as nm +from nominal.ts import _SecondsNanos from . import _create_random_start_end @@ -35,8 +36,8 @@ def test_update_run(): assert run.description == desc assert len(run.properties) == 0 assert len(run.labels) == 0 - assert run.start == _datetime_to_integral_nanoseconds(start) - assert run.end == _datetime_to_integral_nanoseconds(end) + assert run.start == _SecondsNanos.from_datetime(start).to_nanoseconds() + assert run.end == _SecondsNanos.from_datetime(end).to_nanoseconds() new_name = title + "-updated" new_desc = desc + "-updated" @@ -58,8 +59,8 @@ def test_update_run(): assert run.description == new_desc assert run.properties == new_props assert run.labels == tuple(new_labels) - assert run.start == _datetime_to_integral_nanoseconds(new_start) - assert run.end == _datetime_to_integral_nanoseconds(new_end) + assert run.start == _SecondsNanos.from_datetime(new_start).to_nanoseconds() + assert run.end == _SecondsNanos.from_datetime(new_end).to_nanoseconds() def test_add_dataset_to_run_and_list_datasets(csv_data): From 4ad240a8312c9b4daa09e0f26e7c3c7f665f86f1 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 30 Sep 2024 14:41:13 -0400 Subject: [PATCH 45/51] isoformat strings with Z only supported in python 3.11+ --- tests/test_ts.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_ts.py b/tests/test_ts.py index dcfa45ed..763c58f0 100644 --- a/tests/test_ts.py +++ b/tests/test_ts.py @@ -1,6 +1,7 @@ import time from datetime import datetime +import dateutil.parser import pytest import nominal as nm @@ -34,5 +35,5 @@ def test_time_conversions(t: nm.ts._SecondsNanos): assert t == t.from_flexible(t.to_nanoseconds()) # datetime objects don't have nanosecond precision - assert t.seconds == int(datetime.fromisoformat(t.to_iso8601()).timestamp()) - assert t.seconds == t.from_flexible(datetime.fromisoformat(t.to_iso8601())).seconds + assert t.seconds == int(dateutil.parser.parse(t.to_iso8601()).timestamp()) + assert t.seconds == t.from_flexible(dateutil.parser.parse(t.to_iso8601())).seconds From e5586c80db8168e39c929573a6052151436a48bb Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 30 Sep 2024 14:45:49 -0400 Subject: [PATCH 46/51] only compare seconds of datetime obj --- tests/test_ts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_ts.py b/tests/test_ts.py index 763c58f0..df4737d6 100644 --- a/tests/test_ts.py +++ b/tests/test_ts.py @@ -31,9 +31,9 @@ def test_time_conversions(t: nm.ts._SecondsNanos): assert t.nanos == t.to_ingest_api().offset_nanoseconds # no from_ingest_api method - assert t == t.from_flexible(t.to_iso8601()) assert t == t.from_flexible(t.to_nanoseconds()) # datetime objects don't have nanosecond precision assert t.seconds == int(dateutil.parser.parse(t.to_iso8601()).timestamp()) assert t.seconds == t.from_flexible(dateutil.parser.parse(t.to_iso8601())).seconds + assert t.seconds == t.from_flexible(t.to_iso8601()).seconds From 534e901b94693ad72977722f6d443eb23d7e3246 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 30 Sep 2024 14:57:05 -0400 Subject: [PATCH 47/51] add check-types-all --- justfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/justfile b/justfile index 1206b290..5067fd55 100644 --- a/justfile +++ b/justfile @@ -12,6 +12,10 @@ test-e2e token: # check static typing check-types: + poetry run mypy + +# check static typing across all supported python versions +check-types-all: poetry run mypy --python-version 3.12 poetry run mypy --python-version 3.11 poetry run mypy --python-version 3.10 From 0f23c7da9066b2d1b131d9609a5a444d1bac36a8 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 30 Sep 2024 14:59:00 -0400 Subject: [PATCH 48/51] remove extra utils func --- nominal/_utils.py | 3 --- nominal/core.py | 9 +-------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/nominal/_utils.py b/nominal/_utils.py index a16dc464..0d87e8ef 100644 --- a/nominal/_utils.py +++ b/nominal/_utils.py @@ -88,9 +88,6 @@ def reader_writer() -> Iterator[tuple[BinaryIO, BinaryIO]]: r.close() -LogTimestampType: TypeAlias = Literal["absolute", "relative"] - - Param = ParamSpec("Param") diff --git a/nominal/core.py b/nominal/core.py index 163ba8f2..e163fda6 100644 --- a/nominal/core.py +++ b/nominal/core.py @@ -30,14 +30,7 @@ upload_api, ) from ._multipart import put_multipart_upload -from ._utils import ( - FileType, - FileTypes, - LogTimestampType, - construct_user_agent_string, - deprecate_keyword_argument, - update_dataclass, -) +from ._utils import FileType, FileTypes, construct_user_agent_string, deprecate_keyword_argument, update_dataclass from .exceptions import NominalIngestError, NominalIngestFailed, NominalIngestMultiError from .ts import IntegralNanosecondsUTC, LogTimestampType, _AnyTimestampType, _SecondsNanos, _to_typed_timestamp_type From 1b104f30cfec31d4d49eb1d993cea50caaecd825 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 30 Sep 2024 15:01:20 -0400 Subject: [PATCH 49/51] add javadoc format reference. --- nominal/ts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nominal/ts.py b/nominal/ts.py index e36680d9..d0e77008 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -96,6 +96,7 @@ class Custom(_ConjureTimestampType): """A custom timestamp format. The custom timestamps are expected to be absolute timestamps. The format string should be in the format of the `DateTimeFormatter` class in Java. + Ref: https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/time/format/DateTimeFormatter.html#patterns """ format: str From 656ea44d3eeb787cce1573afb4a67558709fa753 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 30 Sep 2024 18:46:23 -0400 Subject: [PATCH 50/51] add docs and many e2e tests --- nominal/ts.py | 192 ++++++++++++++++++++++++++++++++++- tests/e2e/test_timestamps.py | 152 +++++++++++++++++++++++++++ 2 files changed, 343 insertions(+), 1 deletion(-) create mode 100644 tests/e2e/test_timestamps.py diff --git a/nominal/ts.py b/nominal/ts.py index d0e77008..b3ac16b9 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -1,3 +1,193 @@ +"""Timestamp types for Nominal. + +The `nominal.ts` module provides timestamp format specifications and utilities. + +When you _upload_ a dataset to nominal, the dataset may have timestamps in a variety of formats. For example: +- ISO 8601 strings like '2021-01-31T19:00:00Z' +- Epoch timestamps in floating-point seconds since epoch like 1612137600.123 +- Epoch timestamps in integer nanoseconds since epoch like 1612137600123000000 +- Relative timestamps like 12.123 for 12 seconds and 123 milliseconds after some start time +- Various other string timestamp formats, e.g. 'Sun Jan 31 19:00:00 2021' + +All of these may also have different interpretations of the units, epoch, time zone, etc. + +To simplify common usages while allowing for the full flexibility of the Nominal platform, +the client library typically allows you to specify timestamp formats in typeful representations +as well as simple strings for the common/unparameterized cases. + +Wherever you can specify a timestamp format (typically the `timestamp_type` parameter), any of the following can be used: + +```python +"iso_8601" +"epoch_nanoseconds" +"epoch_microseconds" +"epoch_milliseconds" +"epoch_seconds" +"epoch_minutes" +"epoch_hours" +nm.ts.Iso8601() +nm.ts.Epoch("nanoseconds") +nm.ts.Epoch("microseconds") +nm.ts.Epoch("milliseconds") +nm.ts.Epoch("seconds") +nm.ts.Epoch("minutes") +nm.ts.Epoch("hours") +nm.ts.Relative("nanoseconds", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) +nm.ts.Relative("microseconds", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) +nm.ts.Relative("milliseconds", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) +nm.ts.Relative("seconds", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) +nm.ts.Relative("minutes", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) +nm.ts.Relative("hours", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) +nm.ts.Custom(r"yyyy-MM-dd[T]hh:mm:ss") +nm.ts.Custom(r"MM-dd[T]hh:mm:ss", default_year=2024) +``` + +The strings "iso_8601" and "epoch_{unit}" are the simplest forms to specify. The `nm.ts.Iso8601()` and `nm.ts.Epoch("{unit}")` classes are the typeful representations of these formats. + +Relative and custom formats require additional parameters, and so they don't have as simple of a string representation. +Relative timestamps require a start time that they are relative to, e.g. `nm.ts.Relative("{unit}", start=start_time)`. +Custom timestamp formats require a format string compatible with the `DateTimeFormatter` class in Java: see https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/time/format/DateTimeFormatter.html#patterns. + +## Examples + +All of the examples use the same data (timestamp and value) expressed with different timestamp formats, and showcase how to upload them to Nominal. + +### ISO 8601 + +Nominal requires ISO 8601 timestamps to include the time zone, e.g. '2021-01-31T19:00:00Z' or '2021-01-31T19:00:00.123+00:00'. For example: + +```csv +temperature,timestamp +20,2024-09-30T16:37:36.891349Z +21,2024-09-30T16:37:36.990262Z +22,2024-09-30T16:37:37.089310Z +19,2024-09-30T16:37:37.190015Z +23,2024-09-30T16:37:37.289585Z +22,2024-09-30T16:37:37.388941Z +28,2024-09-30T16:37:37.491115Z +24,2024-09-30T16:37:37.590826Z +``` + +```python +nm.upload_csv("temperature.csv", "Exterior Temps", "timestamp", + timestamp_type="iso_8601" # or nm.ts.Iso8601() +) +``` + +### Epoch timestamps + +Nominal supports epoch timestamps in various units, e.g. seconds, milliseconds, nanoseconds, etc. Additionally, the types can be integers or floating-point numbers. + +#### Floating-point seconds since epoch + +```csv +temperature,timestamp +20,1727728656.891349 +21,1727728656.990262 +22,1727728657.08931 +19,1727728657.190015 +23,1727728657.289585 +22,1727728657.388941 +28,1727728657.491115 +24,1727728657.590826 +``` + +```python +nm.upload_csv("temperature.csv", "Exterior Temps", "timestamp", + timestamp_type="epoch_seconds" # or nm.ts.Epoch("seconds") +) +``` + +#### Integer nanoseconds since epoch + +```csv +temperature,timestamp +20,1727728656891349000 +21,1727728656990262000 +22,1727728657089310000 +19,1727728657190015000 +23,1727728657289585000 +22,1727728657388941000 +28,1727728657491115000 +24,1727728657590826000 +``` + +```python +nm.upload_csv("temperature.csv", "Exterior Temps", "timestamp", + timestamp_type="epoch_nanoseconds" # or nm.ts.Epoch("nanoseconds") +) +``` + +### Relative timestamps + +Similar to epoch timestamps, Nominal supports relative timestamps in various units, e.g. seconds, milliseconds, nanoseconds, etc and can be integers or floating-point numbers. +Relative timestamps must be relative to a specified start time. + +```csv +temperature,timestamp +20,0 +21,98913 +22,197961 +19,298666 +23,398236 +22,497592 +28,599766 +24,699477 +``` + +```python +nm.upload_csv("temperature.csv", "Exterior Temps", "timestamp", + timestamp_type=nm.ts.Relative("microseconds", since=datetime.fromtimestamp(1727728656.891349)) +) +``` + +### Custom Format + +#### Customized ctime + +This is an example of a custom time format--the output is like `ctime()`, except with microsecond precision added. + +```csv +temperature,timestamp +20,Mon Sep 30 16:37:36.891349 2024 +21,Mon Sep 30 16:37:36.990262 2024 +22,Mon Sep 30 16:37:37.089310 2024 +19,Mon Sep 30 16:37:37.190015 2024 +23,Mon Sep 30 16:37:37.289585 2024 +22,Mon Sep 30 16:37:37.388941 2024 +28,Mon Sep 30 16:37:37.491115 2024 +24,Mon Sep 30 16:37:37.590826 2024 +``` + +```python +nm.upload_csv("temperature.csv", "Exterior Temps", "timestamp", + timestamp_type=nm.ts.Custom("EEE MMM dd HH:mm:ss.SSSSSS yyyy") +) +``` + +#### IRIG time code + +IRIG time codes come in a variety of formats. A common format specifies a relative timestamp from the beginning of the year, expressed in days:hours:minutes:seconds.milliseconds. + +```csv +temperature,timestamp +20,274:16:37:36.891349 +21,274:16:37:36.990262 +22,274:16:37:37.089310 +19,274:16:37:37.190015 +23,274:16:37:37.289585 +22,274:16:37:37.388941 +28,274:16:37:37.491115 +24,274:16:37:37.590826 +``` + +```python +nm.upload_csv("temperature.csv", "Exterior Temps", "timestamp", + timestamp_type=nm.ts.Custom(r"DDD:HH:mm:ss.SSSSSS", default_year=2024) +) +``` +""" + from __future__ import annotations import abc @@ -46,7 +236,7 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: @dataclass(frozen=True) class Iso8601(_ConjureTimestampType): - """ISO 8601 timestamp format, e.g. '2021-01-01T00:00:00Z' or '2021-01-01T00:00:00.123+00:00'. + """ISO 8601 timestamp format, e.g. '2021-01-31T19:00:00Z' or '2021-01-31T19:00:00.123+00:00'. The time zone must be specified. """ diff --git a/tests/e2e/test_timestamps.py b/tests/e2e/test_timestamps.py new file mode 100644 index 00000000..438c1b51 --- /dev/null +++ b/tests/e2e/test_timestamps.py @@ -0,0 +1,152 @@ +from datetime import datetime, timedelta +from typing import Callable +from unittest import mock +from uuid import uuid4 + +import pytest + +import nominal as nm + + +@pytest.fixture(scope="module") +def temperature_data() -> list[tuple[int, datetime]]: + return [ + (20, datetime.fromisoformat("2024-09-30T16:37:36.891349")), + (21, datetime.fromisoformat("2024-09-30T16:37:36.990262")), + (22, datetime.fromisoformat("2024-09-30T16:37:37.089310")), + (19, datetime.fromisoformat("2024-09-30T16:37:37.190015")), + (23, datetime.fromisoformat("2024-09-30T16:37:37.289585")), + (22, datetime.fromisoformat("2024-09-30T16:37:37.388941")), + (28, datetime.fromisoformat("2024-09-30T16:37:37.491115")), + (24, datetime.fromisoformat("2024-09-30T16:37:37.590826")), + ] + + +def _create_csv_data(data: list[tuple[int, datetime]], formatter: Callable[[int, datetime], str]) -> bytes: + return ("temperature,timestamp\n" + "\n".join(formatter(temp, ts) for temp, ts in data)).encode() + + +# def test_iso_8601_str(request, temperature_data): +# nm.upload_csv("temperature.csv", "Exterior Temps", timestamp_column="timestamp", timestamp_type="iso_8601") +# nm.upload_csv("temperature.csv", "Exterior Temps", timestamp_column="timestamp", timestamp_type=nm.ts.Iso8601()) + + +def test_iso_8601_str(request, temperature_data): + name = f"dataset-{uuid4()}" + desc = f"timestamp test {request.node.name} {uuid4()}" + + csv_data = _create_csv_data(temperature_data, lambda temp, ts: f"{temp},{ts.isoformat()}Z") + + with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): + ds = nm.upload_csv("fake_path.csv", name, "timestamp", "iso_8601", desc) + ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) + + assert ds.name == name + assert ds.description == desc + + +def test_iso_8601_typed(request, temperature_data): + name = f"dataset-{uuid4()}" + desc = f"timestamp test {request.node.name} {uuid4()}" + + csv_data = _create_csv_data(temperature_data, lambda temp, ts: f"{temp},{ts.isoformat()}Z") + with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): + ds = nm.upload_csv("fake_path.csv", name, "timestamp", nm.ts.ISO_8601, desc) + ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) + + assert ds.name == name + assert ds.description == desc + + +def test_epoch_seconds_str(request, temperature_data): + name = f"dataset-{uuid4()}" + desc = f"timestamp test {request.node.name} {uuid4()}" + + csv_data = _create_csv_data(temperature_data, lambda temp, ts: f"{temp},{ts.timestamp()}") + with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): + ds = nm.upload_csv("fake_path.csv", name, "timestamp", "epoch_seconds", desc) + ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) + + assert ds.name == name + assert ds.description == desc + + +def test_epoch_seconds_typed(request, temperature_data): + name = f"dataset-{uuid4()}" + desc = f"timestamp test {request.node.name} {uuid4()}" + + csv_data = _create_csv_data(temperature_data, lambda temp, ts: f"{temp},{ts.timestamp()}") + with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): + ds = nm.upload_csv("fake_path.csv", name, "timestamp", nm.ts.EPOCH_SECONDS, desc) + ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) + + assert ds.name == name + assert ds.description == desc + + +def test_epoch_milliseconds_str(request, temperature_data): + name = f"dataset-{uuid4()}" + desc = f"timestamp test {request.node.name} {uuid4()}" + + csv_data = _create_csv_data(temperature_data, lambda temp, ts: f"{temp},{ts.timestamp() * 1000}") + with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): + ds = nm.upload_csv("fake_path.csv", name, "timestamp", "epoch_milliseconds", desc) + ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) + + assert ds.name == name + assert ds.description == desc + + +def test_relative_microseconds(request, temperature_data): + name = f"dataset-{uuid4()}" + desc = f"timestamp test {request.node.name} {uuid4()}" + start = temperature_data[0][1] + + def fmt(temp: int, ts: datetime) -> str: + delta = ts - start + micros = 1_000_000 + int(delta.total_seconds()) + delta.microseconds + return f"{temp},{micros}" + + csv_data = _create_csv_data(temperature_data, fmt) + with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): + ds = nm.upload_csv("fake_path.csv", name, "timestamp", nm.ts.Relative("microseconds", start), desc) + ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) + + assert ds.name == name + assert ds.description == desc + + +def test_custom_ctime(request, temperature_data): + name = f"dataset-{uuid4()}" + desc = f"timestamp test {request.node.name} {uuid4()}" + + def fmt(temp: int, ts: datetime) -> str: + ctime = ts.ctime() + ctime = ctime[:-5] + f".{ts.microsecond:06d}" + ctime[-5:] + return f"{temp},{ctime}" + + csv_data = _create_csv_data(temperature_data, fmt) + with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): + ds = nm.upload_csv("fake_path.csv", name, "timestamp", nm.ts.Custom(r"EEE MMM dd HH:mm:ss.SSSSSS yyyy"), desc) + ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) + + assert ds.name == name + assert ds.description == desc + + +def test_custom_irig(request, temperature_data): + name = f"dataset-{uuid4()}" + desc = f"timestamp test {request.node.name} {uuid4()}" + + def fmt(temp: int, ts: datetime) -> str: + return f"{temp},{ts.strftime(r'%j:%H:%M:%S.%f')}" + + csv_data = _create_csv_data(temperature_data, fmt) + with mock.patch("builtins.open", mock.mock_open(read_data=csv_data)): + ds = nm.upload_csv( + "fake_path.csv", name, "timestamp", nm.ts.Custom(r"DDD:HH:mm:ss.SSSSSS", default_year=2024), desc + ) + ds.poll_until_ingestion_completed(interval=timedelta(seconds=0.1)) + + assert ds.name == name + assert ds.description == desc From a07040c860e0b2e6f363aadf942a224acb887793 Mon Sep 17 00:00:00 2001 From: Alexander Reynolds Date: Mon, 30 Sep 2024 19:07:10 -0400 Subject: [PATCH 51/51] clean up ts docs --- docs/reference/ts.md | 1 + mkdocs.yml | 1 + nominal/ts.py | 45 ++++++++++++++++++++------------------------ 3 files changed, 22 insertions(+), 25 deletions(-) create mode 100644 docs/reference/ts.md diff --git a/docs/reference/ts.md b/docs/reference/ts.md new file mode 100644 index 00000000..ad5b2871 --- /dev/null +++ b/docs/reference/ts.md @@ -0,0 +1 @@ +::: nominal.ts diff --git a/mkdocs.yml b/mkdocs.yml index ebbd8d68..b933921b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -23,6 +23,7 @@ nav: - Examples: usage/examples.md - Reference: - High-level SDK: reference/toplevel.md + - Timestamps: reference/ts.md - Core SDK: reference/core.md - Exceptions: reference/exceptions.md - nom cli: reference/nom-cli.md diff --git a/nominal/ts.py b/nominal/ts.py index b3ac16b9..dd607c03 100644 --- a/nominal/ts.py +++ b/nominal/ts.py @@ -1,8 +1,7 @@ -"""Timestamp types for Nominal. - -The `nominal.ts` module provides timestamp format specifications and utilities. +"""The `nominal.ts` module provides timestamp format specifications and utilities. When you _upload_ a dataset to nominal, the dataset may have timestamps in a variety of formats. For example: + - ISO 8601 strings like '2021-01-31T19:00:00Z' - Epoch timestamps in floating-point seconds since epoch like 1612137600.123 - Epoch timestamps in integer nanoseconds since epoch like 1612137600123000000 @@ -12,10 +11,9 @@ All of these may also have different interpretations of the units, epoch, time zone, etc. To simplify common usages while allowing for the full flexibility of the Nominal platform, -the client library typically allows you to specify timestamp formats in typeful representations -as well as simple strings for the common/unparameterized cases. +the client library allows you to specify timestamp formats with simple strings and more complex typeful representations. -Wherever you can specify a timestamp format (typically the `timestamp_type` parameter), any of the following can be used: +Wherever you can specify a timestamp format (typically the `timestamp_type` parameter), these are all examples of valid formats: ```python "iso_8601" @@ -26,27 +24,22 @@ "epoch_minutes" "epoch_hours" nm.ts.Iso8601() -nm.ts.Epoch("nanoseconds") nm.ts.Epoch("microseconds") -nm.ts.Epoch("milliseconds") nm.ts.Epoch("seconds") -nm.ts.Epoch("minutes") nm.ts.Epoch("hours") nm.ts.Relative("nanoseconds", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) -nm.ts.Relative("microseconds", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) nm.ts.Relative("milliseconds", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) nm.ts.Relative("seconds", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) nm.ts.Relative("minutes", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) -nm.ts.Relative("hours", start=datetime.fromisoformat("2021-01-31T19:00:00Z")) nm.ts.Custom(r"yyyy-MM-dd[T]hh:mm:ss") -nm.ts.Custom(r"MM-dd[T]hh:mm:ss", default_year=2024) +nm.ts.Custom(r"DDD:HH:mm:ss.SSSSSS", default_year=2024) ``` -The strings "iso_8601" and "epoch_{unit}" are the simplest forms to specify. The `nm.ts.Iso8601()` and `nm.ts.Epoch("{unit}")` classes are the typeful representations of these formats. +The strings `"iso_8601"` and `"epoch_{unit}"` are equivalent to using the types `nm.ts.Iso8601()` and `nm.ts.Epoch("{unit}")`. -Relative and custom formats require additional parameters, and so they don't have as simple of a string representation. +Relative and custom formats require additional parameters, so they can't be specified with a string. Relative timestamps require a start time that they are relative to, e.g. `nm.ts.Relative("{unit}", start=start_time)`. -Custom timestamp formats require a format string compatible with the `DateTimeFormatter` class in Java: see https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/time/format/DateTimeFormatter.html#patterns. +Custom timestamp formats require a format string compatible with the `DateTimeFormatter` class in Java: see [java docs](https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/time/format/DateTimeFormatter.html#patterns). ## Examples @@ -54,7 +47,7 @@ ### ISO 8601 -Nominal requires ISO 8601 timestamps to include the time zone, e.g. '2021-01-31T19:00:00Z' or '2021-01-31T19:00:00.123+00:00'. For example: +Nominal requires ISO 8601 timestamps to include the time zone, e.g. `'2021-01-31T19:00:00Z'` or `'2021-01-31T19:00:00.123+00:00'`. For example: ```csv temperature,timestamp @@ -76,7 +69,7 @@ ### Epoch timestamps -Nominal supports epoch timestamps in various units, e.g. seconds, milliseconds, nanoseconds, etc. Additionally, the types can be integers or floating-point numbers. +Nominal supports epoch timestamps in different units: hours, minutes, seconds, milliseconds, microseconds, and nanoseconds. The values can be integers or floating-point numbers. #### Floating-point seconds since epoch @@ -120,8 +113,8 @@ ### Relative timestamps -Similar to epoch timestamps, Nominal supports relative timestamps in various units, e.g. seconds, milliseconds, nanoseconds, etc and can be integers or floating-point numbers. -Relative timestamps must be relative to a specified start time. +Similar to epoch timestamps, Nominal supports relative timestamps in the same units: hours, minutes, seconds, milliseconds, microseconds, and nanoseconds, and can be integer or floating-point values. +Relative timestamps are _relative to_ a specified start time. ```csv temperature,timestamp @@ -143,9 +136,11 @@ ### Custom Format +Nominal supports custom timestamp formats. The format string should be in the format of the `DateTimeFormatter` class in Java: see [java docs](https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/time/format/DateTimeFormatter.html#patterns). + #### Customized ctime -This is an example of a custom time format--the output is like `ctime()`, except with microsecond precision added. +This time format is similar to the string format from `ctime()`, except with microsecond precision added. ```csv temperature,timestamp @@ -167,7 +162,7 @@ #### IRIG time code -IRIG time codes come in a variety of formats. A common format specifies a relative timestamp from the beginning of the year, expressed in days:hours:minutes:seconds.milliseconds. +IRIG time codes come in a variety of formats. A common IRIG format specifies a relative timestamp from the beginning of the year, expressed in `days:hours:minutes:seconds.ms`. ```csv temperature,timestamp @@ -223,7 +218,7 @@ ] IntegralNanosecondsUTC: TypeAlias = int -"""A timestamp in nanoseconds since the Unix epoch, UTC.""" +"""Alias for an `int` used in the code for documentation purposes. This value is a timestamp in nanoseconds since the Unix epoch, UTC.""" LogTimestampType: TypeAlias = Literal["absolute", "relative"] @@ -292,7 +287,7 @@ class Custom(_ConjureTimestampType): format: str """Must be in the format of the `DateTimeFormatter` class in Java.""" default_year: int | None = None - """Accepted as an optional field for cases like IRIG time format, where the year is not present.""" + """Accepted as an optional field for cases like IRIG time codes, where the year is not present.""" def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: fmt = ingest_api.CustomTimestamp(format=self.format, default_year=self.default_year) @@ -337,10 +332,10 @@ def _to_conjure_ingest_api(self) -> ingest_api.TimestampType: ] TypedTimestampType: TypeAlias = Union[Iso8601, Epoch, Relative, Custom] -"""Strongly typed timestamp types.""" +"""Type alias for all of the strongly typed timestamp types.""" _AnyTimestampType: TypeAlias = Union[TypedTimestampType, _LiteralAbsolute, _LiteralRelativeDeprecated] -"""All allowable timestamp types, including string representations.""" +"""Type alias for all of the allowable timestamp types, including string representations.""" def _to_typed_timestamp_type(type_: _AnyTimestampType) -> TypedTimestampType: