diff --git a/README.md b/README.md index 118b2152..2a0dd993 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,8 @@ You can log configurations or other single values. Pass the metadata as a dictio For example, `{"parameters/learning_rate": 0.001}`. In the field path, each forward slash `/` nests the field under a namespace. Use namespaces to structure the metadata into meaningful categories. +Any `datetime` values that don't have the `tzinfo` attribute set are assumed to be in the local timezone. + __Parameters__ | Name | Type | Default | Description | @@ -265,7 +267,7 @@ __Parameters__ |-------------|------------------------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `data` | `Dict[str, Union[float, int]]` | `None` | Dictionary of metrics to log. Each metric value is associated with a step. To log multiple metrics at once, pass multiple key-value pairs. | | `step` | `Union[float, int]` | `None` | Index of the log entry. Must be increasing.
**Tip:** Using float rather than int values can be useful, for example, when logging substeps in a batch. | -| `timestamp` | `datetime`, optional | `None` | Time of logging the metadata. | +| `timestamp` | `datetime`, optional | `None` | Time of logging the metadata. If not provided, the current time is used. If provided, and `timestamp.tzinfo` is not set, the time is assumed to be in the local timezone. | __Examples__ diff --git a/src/neptune_scale/__init__.py b/src/neptune_scale/__init__.py index d6c045c6..e4796645 100644 --- a/src/neptune_scale/__init__.py +++ b/src/neptune_scale/__init__.py @@ -14,7 +14,10 @@ import threading import time from contextlib import AbstractContextManager -from datetime import datetime +from datetime import ( + datetime, + timezone, +) from multiprocessing.sharedctypes import Synchronized from multiprocessing.synchronize import Condition as ConditionT from typing import ( @@ -52,7 +55,10 @@ datetime_to_proto, make_step, ) -from neptune_scale.core.util import safe_signal_name +from neptune_scale.core.util import ( + ensure_utc, + safe_signal_name, +) from neptune_scale.core.validation import ( verify_collection_type, verify_max_length, @@ -429,7 +435,8 @@ def log_metrics( To log multiple metrics at once, pass multiple key-value pairs. step: Index of the log entry. Must be increasing. Tip: Using float rather than int values can be useful, for example, when logging substeps in a batch. - timestamp (optional): Time of logging the metadata. + timestamp (optional): Time of logging the metadata. If not provided, the current time is used. If provided, + and `timestamp.tzinfo` is not set, the time is assumed to be in the local timezone. Examples: @@ -462,6 +469,8 @@ def log_configs(self, data: Optional[Dict[str, Union[float, bool, int, str, date data: Dictionary of configs or other values to log. Available types: float, integer, Boolean, string, and datetime. + Any `datetime` values that don't have the `tzinfo` attribute set are assumed to be in the local timezone. + Example: ``` from neptune_scale import Run @@ -540,7 +549,7 @@ def log( verify_type("tags_add", tags_add, (dict, type(None))) verify_type("tags_remove", tags_remove, (dict, type(None))) - timestamp = datetime.now() if timestamp is None else timestamp + timestamp = datetime.now(tz=timezone.utc) if timestamp is None else ensure_utc(timestamp) configs = {} if configs is None else configs metrics = {} if metrics is None else metrics tags_add = {} if tags_add is None else tags_add @@ -568,7 +577,7 @@ def log( run_id=self._run_id, step=step, timestamp=timestamp, - fields=configs, + configs=configs, metrics=metrics, add_tags=tags_add, remove_tags=tags_remove, diff --git a/src/neptune_scale/core/metadata_splitter.py b/src/neptune_scale/core/metadata_splitter.py index c2018926..87fb681e 100644 --- a/src/neptune_scale/core/metadata_splitter.py +++ b/src/neptune_scale/core/metadata_splitter.py @@ -43,7 +43,7 @@ def __init__( run_id: str, step: Optional[Union[int, float]], timestamp: datetime, - fields: Dict[str, Union[float, bool, int, str, datetime]], + configs: Dict[str, Union[float, bool, int, str, datetime]], metrics: Dict[str, float], add_tags: Dict[str, Union[List[str], Set[str]]], remove_tags: Dict[str, Union[List[str], Set[str]]], @@ -53,7 +53,7 @@ def __init__( self._timestamp = datetime_to_proto(timestamp) self._project = project self._run_id = run_id - self._fields = peekable(fields.items()) + self._configs = peekable(configs.items()) self._metrics = peekable(starmap(lambda k, v: (k, float(v)), metrics.items())) self._add_tags = peekable(add_tags.items()) self._remove_tags = peekable(remove_tags.items()) @@ -84,7 +84,7 @@ def __next__(self) -> Tuple[RunOperation, int]: size = update.ByteSize() size = self.populate( - assets=self._fields, + assets=self._configs, update_producer=lambda key, value: update.assign[key].MergeFrom(value), size=size, ) diff --git a/src/neptune_scale/core/serialization.py b/src/neptune_scale/core/serialization.py index 13babac7..cc8336ed 100644 --- a/src/neptune_scale/core/serialization.py +++ b/src/neptune_scale/core/serialization.py @@ -21,6 +21,8 @@ Value, ) +from .util import ensure_utc + def make_value(value: Union[Value, float, str, int, bool, datetime, List[str], Set[str]]) -> Value: if isinstance(value, Value): @@ -43,7 +45,7 @@ def make_value(value: Union[Value, float, str, int, bool, datetime, List[str], S def datetime_to_proto(dt: datetime) -> Timestamp: - dt_ts = dt.timestamp() + dt_ts = ensure_utc(dt).timestamp() return Timestamp(seconds=int(dt_ts), nanos=int((dt_ts % 1) * 1e9)) diff --git a/src/neptune_scale/core/util.py b/src/neptune_scale/core/util.py index 60fe4b0b..34ffc92f 100644 --- a/src/neptune_scale/core/util.py +++ b/src/neptune_scale/core/util.py @@ -1,4 +1,8 @@ import signal +from datetime import ( + datetime, + timezone, +) def safe_signal_name(signum: int) -> str: @@ -8,3 +12,12 @@ def safe_signal_name(signum: int) -> str: signame = str(signum) return signame + + +def ensure_utc(dt: datetime) -> datetime: + """If `dt` has no TZ info, assume it's local time, and convert to UTC. Otherwise return as is.""" + + if dt.tzinfo is None: + return dt.astimezone(timezone.utc) + + return dt diff --git a/tests/unit/test_metadata_splitter.py b/tests/unit/test_metadata_splitter.py index 6fe5691f..86a16dd1 100644 --- a/tests/unit/test_metadata_splitter.py +++ b/tests/unit/test_metadata_splitter.py @@ -1,4 +1,7 @@ -from datetime import datetime +from datetime import ( + datetime, + timezone, +) from freezegun import freeze_time from google.protobuf.timestamp_pb2 import Timestamp @@ -23,8 +26,8 @@ def test_empty(): project="workspace/project", run_id="run_id", step=1, - timestamp=datetime.now(), - fields={}, + timestamp=datetime.now(timezone.utc), + configs={}, metrics={}, add_tags={}, remove_tags={}, @@ -50,13 +53,13 @@ def test_fields(): project="workspace/project", run_id="run_id", step=1, - timestamp=datetime.now(), - fields={ + timestamp=datetime.now(timezone.utc), + configs={ "some/string": "value", "some/int": 2501, "some/float": 3.14, "some/bool": True, - "some/datetime": datetime.now(), + "some/datetime": datetime.now(timezone.utc), "some/tags": {"tag1", "tag2"}, }, metrics={}, @@ -94,8 +97,8 @@ def test_metrics(): project="workspace/project", run_id="run_id", step=1, - timestamp=datetime.now(), - fields={}, + timestamp=datetime.now(timezone.utc), + configs={}, metrics={ "some/metric": 3.14, }, @@ -128,8 +131,8 @@ def test_tags(): project="workspace/project", run_id="run_id", step=1, - timestamp=datetime.now(), - fields={}, + timestamp=datetime.now(timezone.utc), + configs={}, metrics={}, add_tags={ "some/tags": {"tag1", "tag2"}, @@ -174,7 +177,7 @@ def test_tags(): def test_splitting(): # given max_size = 1024 - timestamp = datetime.now() + timestamp = datetime.now(timezone.utc) metrics = {f"metric{v}": 7 / 9.0 * v for v in range(1000)} fields = {f"field{v}": v for v in range(1000)} add_tags = {f"add/tag{v}": {f"value{v}"} for v in range(1000)} @@ -186,7 +189,7 @@ def test_splitting(): run_id="run_id", step=1, timestamp=timestamp, - fields=fields, + configs=fields, metrics=metrics, add_tags=add_tags, remove_tags=remove_tags, @@ -220,7 +223,7 @@ def test_splitting(): def test_split_large_tags(): # given max_size = 1024 - timestamp = datetime.now() + timestamp = datetime.now(timezone.utc) metrics = {} fields = {} add_tags = {"add/tag": {f"value{v}" for v in range(1000)}} @@ -232,7 +235,7 @@ def test_split_large_tags(): run_id="run_id", step=1, timestamp=timestamp, - fields=fields, + configs=fields, metrics=metrics, add_tags=add_tags, remove_tags=remove_tags,