From 8a9e4bd9099d11ebea9a01e38a7d9dd344bb3ea0 Mon Sep 17 00:00:00 2001 From: Krzysztof Godlewski Date: Mon, 4 Nov 2024 16:07:24 +0100 Subject: [PATCH 1/3] Rename a field in `MetadataSplitter` This is to be more consistent with other spots --- src/neptune_scale/__init__.py | 2 +- src/neptune_scale/core/metadata_splitter.py | 6 +++--- tests/unit/test_metadata_splitter.py | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/neptune_scale/__init__.py b/src/neptune_scale/__init__.py index d6c045c6..996afb27 100644 --- a/src/neptune_scale/__init__.py +++ b/src/neptune_scale/__init__.py @@ -568,7 +568,7 @@ def log( run_id=self._run_id, step=step, timestamp=timestamp, - fields=configs, + configs=configs, metrics=metrics, add_tags=tags_add, remove_tags=tags_remove, diff --git a/src/neptune_scale/core/metadata_splitter.py b/src/neptune_scale/core/metadata_splitter.py index c2018926..87fb681e 100644 --- a/src/neptune_scale/core/metadata_splitter.py +++ b/src/neptune_scale/core/metadata_splitter.py @@ -43,7 +43,7 @@ def __init__( run_id: str, step: Optional[Union[int, float]], timestamp: datetime, - fields: Dict[str, Union[float, bool, int, str, datetime]], + configs: Dict[str, Union[float, bool, int, str, datetime]], metrics: Dict[str, float], add_tags: Dict[str, Union[List[str], Set[str]]], remove_tags: Dict[str, Union[List[str], Set[str]]], @@ -53,7 +53,7 @@ def __init__( self._timestamp = datetime_to_proto(timestamp) self._project = project self._run_id = run_id - self._fields = peekable(fields.items()) + self._configs = peekable(configs.items()) self._metrics = peekable(starmap(lambda k, v: (k, float(v)), metrics.items())) self._add_tags = peekable(add_tags.items()) self._remove_tags = peekable(remove_tags.items()) @@ -84,7 +84,7 @@ def __next__(self) -> Tuple[RunOperation, int]: size = update.ByteSize() size = self.populate( - assets=self._fields, + assets=self._configs, update_producer=lambda key, value: update.assign[key].MergeFrom(value), size=size, ) diff --git a/tests/unit/test_metadata_splitter.py b/tests/unit/test_metadata_splitter.py index 6fe5691f..67e941fe 100644 --- a/tests/unit/test_metadata_splitter.py +++ b/tests/unit/test_metadata_splitter.py @@ -24,7 +24,7 @@ def test_empty(): run_id="run_id", step=1, timestamp=datetime.now(), - fields={}, + configs={}, metrics={}, add_tags={}, remove_tags={}, @@ -51,7 +51,7 @@ def test_fields(): run_id="run_id", step=1, timestamp=datetime.now(), - fields={ + configs={ "some/string": "value", "some/int": 2501, "some/float": 3.14, @@ -95,7 +95,7 @@ def test_metrics(): run_id="run_id", step=1, timestamp=datetime.now(), - fields={}, + configs={}, metrics={ "some/metric": 3.14, }, @@ -129,7 +129,7 @@ def test_tags(): run_id="run_id", step=1, timestamp=datetime.now(), - fields={}, + configs={}, metrics={}, add_tags={ "some/tags": {"tag1", "tag2"}, @@ -186,7 +186,7 @@ def test_splitting(): run_id="run_id", step=1, timestamp=timestamp, - fields=fields, + configs=fields, metrics=metrics, add_tags=add_tags, remove_tags=remove_tags, @@ -232,7 +232,7 @@ def test_split_large_tags(): run_id="run_id", step=1, timestamp=timestamp, - fields=fields, + configs=fields, metrics=metrics, add_tags=add_tags, remove_tags=remove_tags, From 10c2d59223b6f45bd89952f99a38fa351aa11a55 Mon Sep 17 00:00:00 2001 From: Krzysztof Godlewski Date: Mon, 4 Nov 2024 16:12:49 +0100 Subject: [PATCH 2/3] Assume local timezone for naive `datetime` Any `datetime` object passed to `log*` functions will be converted to UTC prior to logging. If `tzinfo` is not provided, it is assumed to be in local time. --- README.md | 4 +++- src/neptune_scale/__init__.py | 17 +++++++++++++---- src/neptune_scale/core/serialization.py | 4 +++- src/neptune_scale/core/util.py | 13 +++++++++++++ 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 118b2152..2a0dd993 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,8 @@ You can log configurations or other single values. Pass the metadata as a dictio For example, `{"parameters/learning_rate": 0.001}`. In the field path, each forward slash `/` nests the field under a namespace. Use namespaces to structure the metadata into meaningful categories. +Any `datetime` values that don't have the `tzinfo` attribute set are assumed to be in the local timezone. + __Parameters__ | Name | Type | Default | Description | @@ -265,7 +267,7 @@ __Parameters__ |-------------|------------------------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `data` | `Dict[str, Union[float, int]]` | `None` | Dictionary of metrics to log. Each metric value is associated with a step. To log multiple metrics at once, pass multiple key-value pairs. | | `step` | `Union[float, int]` | `None` | Index of the log entry. Must be increasing.
**Tip:** Using float rather than int values can be useful, for example, when logging substeps in a batch. | -| `timestamp` | `datetime`, optional | `None` | Time of logging the metadata. | +| `timestamp` | `datetime`, optional | `None` | Time of logging the metadata. If not provided, the current time is used. If provided, and `timestamp.tzinfo` is not set, the time is assumed to be in the local timezone. | __Examples__ diff --git a/src/neptune_scale/__init__.py b/src/neptune_scale/__init__.py index 996afb27..e4796645 100644 --- a/src/neptune_scale/__init__.py +++ b/src/neptune_scale/__init__.py @@ -14,7 +14,10 @@ import threading import time from contextlib import AbstractContextManager -from datetime import datetime +from datetime import ( + datetime, + timezone, +) from multiprocessing.sharedctypes import Synchronized from multiprocessing.synchronize import Condition as ConditionT from typing import ( @@ -52,7 +55,10 @@ datetime_to_proto, make_step, ) -from neptune_scale.core.util import safe_signal_name +from neptune_scale.core.util import ( + ensure_utc, + safe_signal_name, +) from neptune_scale.core.validation import ( verify_collection_type, verify_max_length, @@ -429,7 +435,8 @@ def log_metrics( To log multiple metrics at once, pass multiple key-value pairs. step: Index of the log entry. Must be increasing. Tip: Using float rather than int values can be useful, for example, when logging substeps in a batch. - timestamp (optional): Time of logging the metadata. + timestamp (optional): Time of logging the metadata. If not provided, the current time is used. If provided, + and `timestamp.tzinfo` is not set, the time is assumed to be in the local timezone. Examples: @@ -462,6 +469,8 @@ def log_configs(self, data: Optional[Dict[str, Union[float, bool, int, str, date data: Dictionary of configs or other values to log. Available types: float, integer, Boolean, string, and datetime. + Any `datetime` values that don't have the `tzinfo` attribute set are assumed to be in the local timezone. + Example: ``` from neptune_scale import Run @@ -540,7 +549,7 @@ def log( verify_type("tags_add", tags_add, (dict, type(None))) verify_type("tags_remove", tags_remove, (dict, type(None))) - timestamp = datetime.now() if timestamp is None else timestamp + timestamp = datetime.now(tz=timezone.utc) if timestamp is None else ensure_utc(timestamp) configs = {} if configs is None else configs metrics = {} if metrics is None else metrics tags_add = {} if tags_add is None else tags_add diff --git a/src/neptune_scale/core/serialization.py b/src/neptune_scale/core/serialization.py index 13babac7..cc8336ed 100644 --- a/src/neptune_scale/core/serialization.py +++ b/src/neptune_scale/core/serialization.py @@ -21,6 +21,8 @@ Value, ) +from .util import ensure_utc + def make_value(value: Union[Value, float, str, int, bool, datetime, List[str], Set[str]]) -> Value: if isinstance(value, Value): @@ -43,7 +45,7 @@ def make_value(value: Union[Value, float, str, int, bool, datetime, List[str], S def datetime_to_proto(dt: datetime) -> Timestamp: - dt_ts = dt.timestamp() + dt_ts = ensure_utc(dt).timestamp() return Timestamp(seconds=int(dt_ts), nanos=int((dt_ts % 1) * 1e9)) diff --git a/src/neptune_scale/core/util.py b/src/neptune_scale/core/util.py index 60fe4b0b..34ffc92f 100644 --- a/src/neptune_scale/core/util.py +++ b/src/neptune_scale/core/util.py @@ -1,4 +1,8 @@ import signal +from datetime import ( + datetime, + timezone, +) def safe_signal_name(signum: int) -> str: @@ -8,3 +12,12 @@ def safe_signal_name(signum: int) -> str: signame = str(signum) return signame + + +def ensure_utc(dt: datetime) -> datetime: + """If `dt` has no TZ info, assume it's local time, and convert to UTC. Otherwise return as is.""" + + if dt.tzinfo is None: + return dt.astimezone(timezone.utc) + + return dt From 106475d6c3ab508d24b9b33f54621cd0d0af5e4a Mon Sep 17 00:00:00 2001 From: Krzysztof Godlewski Date: Mon, 4 Nov 2024 16:34:00 +0100 Subject: [PATCH 3/3] Use explicit UTC timezone in MetadataSplitter tests --- tests/unit/test_metadata_splitter.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_metadata_splitter.py b/tests/unit/test_metadata_splitter.py index 67e941fe..86a16dd1 100644 --- a/tests/unit/test_metadata_splitter.py +++ b/tests/unit/test_metadata_splitter.py @@ -1,4 +1,7 @@ -from datetime import datetime +from datetime import ( + datetime, + timezone, +) from freezegun import freeze_time from google.protobuf.timestamp_pb2 import Timestamp @@ -23,7 +26,7 @@ def test_empty(): project="workspace/project", run_id="run_id", step=1, - timestamp=datetime.now(), + timestamp=datetime.now(timezone.utc), configs={}, metrics={}, add_tags={}, @@ -50,13 +53,13 @@ def test_fields(): project="workspace/project", run_id="run_id", step=1, - timestamp=datetime.now(), + timestamp=datetime.now(timezone.utc), configs={ "some/string": "value", "some/int": 2501, "some/float": 3.14, "some/bool": True, - "some/datetime": datetime.now(), + "some/datetime": datetime.now(timezone.utc), "some/tags": {"tag1", "tag2"}, }, metrics={}, @@ -94,7 +97,7 @@ def test_metrics(): project="workspace/project", run_id="run_id", step=1, - timestamp=datetime.now(), + timestamp=datetime.now(timezone.utc), configs={}, metrics={ "some/metric": 3.14, @@ -128,7 +131,7 @@ def test_tags(): project="workspace/project", run_id="run_id", step=1, - timestamp=datetime.now(), + timestamp=datetime.now(timezone.utc), configs={}, metrics={}, add_tags={ @@ -174,7 +177,7 @@ def test_tags(): def test_splitting(): # given max_size = 1024 - timestamp = datetime.now() + timestamp = datetime.now(timezone.utc) metrics = {f"metric{v}": 7 / 9.0 * v for v in range(1000)} fields = {f"field{v}": v for v in range(1000)} add_tags = {f"add/tag{v}": {f"value{v}"} for v in range(1000)} @@ -220,7 +223,7 @@ def test_splitting(): def test_split_large_tags(): # given max_size = 1024 - timestamp = datetime.now() + timestamp = datetime.now(timezone.utc) metrics = {} fields = {} add_tags = {"add/tag": {f"value{v}" for v in range(1000)}}