neptune-ai · michalsosn · Mar 10, 2025 · Mar 4, 2025 · Mar 5, 2025 · Mar 5, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -33,5 +33,6 @@ repos:
           - neptune-api==0.11.0
           - more-itertools
           - backoff
+          - types-click
 default_language_version:
   python: python3
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,6 +15,8 @@ neptune-api = "^0.11.0"
 more-itertools = "^10.0.0"
 psutil = "^5.0.0"
 backoff = "^2.0.0"
+click = ">=7.0"
+tqdm = "^4.21.0"
 
 [tool.poetry]
 name = "neptune-scale"
@@ -95,3 +97,6 @@ show_error_codes = "True"
 
 [tool.pytest.ini_options]
 addopts = "--doctest-modules -n auto"
+
+[tool.poetry.scripts]
+neptune = "neptune_scale.cli.commands:main"
diff --git a/src/neptune_scale/api/run.py b/src/neptune_scale/api/run.py
@@ -9,7 +9,6 @@
 from types import TracebackType
 
 from neptune_scale.sync.operations_repository import (
-    DB_VERSION,
     Metadata,
     OperationsRepository,
 )
@@ -45,7 +44,6 @@
 from neptune_scale.exceptions import (
     NeptuneApiTokenNotProvided,
     NeptuneConflictingDataInLocalStorage,
-    NeptuneLocalStorageInUnsupportedVersion,
     NeptuneProjectNotProvided,
 )
 from neptune_scale.net.serialization import (
@@ -184,12 +182,6 @@ def __init__(
         assert project is not None  # mypy
         input_project: str = project
 
-        api_token = api_token or os.environ.get(API_TOKEN_ENV_NAME)
-        if api_token is None:
-            raise NeptuneApiTokenNotProvided()
-        assert api_token is not None  # mypy
-        input_api_token: str = api_token
-
         mode = mode or os.environ.get(MODE_ENV_NAME, "async")  # type: ignore
 
         verify_non_empty("run_id", run_id)
@@ -240,6 +232,12 @@ def __init__(
         if mode == "async":
             assert self._sequence_tracker is not None
 
+            api_token = api_token or os.environ.get(API_TOKEN_ENV_NAME)
+            if api_token is None:
+                raise NeptuneApiTokenNotProvided()
+            assert api_token is not None  # mypy
+            input_api_token: str = api_token
+
             self._errors_queue: Optional[ErrorsQueue] = ErrorsQueue()
             self._errors_monitor: Optional[ErrorsMonitor] = ErrorsMonitor(
                 errors_queue=self._errors_queue,
@@ -762,9 +760,6 @@ def _validate_existing_db(
     fork_run_id: Optional[str],
     fork_step: Optional[float],
 ) -> None:
-    if existing_metadata.version != DB_VERSION:
-        raise NeptuneLocalStorageInUnsupportedVersion()
-
     if existing_metadata.project != project or existing_metadata.run_id != run_id:
         # should never happen because we use project and run_id to create the repository path
         raise NeptuneConflictingDataInLocalStorage()

diff --git a/src/neptune_scale/cli/__init__.py b/src/neptune_scale/cli/__init__.py
diff --git a/src/neptune_scale/cli/commands.py b/src/neptune_scale/cli/commands.py
@@ -0,0 +1,56 @@
+#
+# Copyright (c) 2022, Neptune Labs Sp. z o.o.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+__all__ = ["sync"]
+
+import os
+from pathlib import Path
+from typing import Optional
+
+import click
+
+from neptune_scale.cli.sync import sync_all
+from neptune_scale.exceptions import NeptuneApiTokenNotProvided
+from neptune_scale.util.envs import API_TOKEN_ENV_NAME
+
+
+@click.group()
+def main() -> None:
+    pass
+
+
+@main.command()
+@click.argument(
+    "run_log_file",
+    type=click.Path(exists=True, dir_okay=False, resolve_path=True),
+    metavar="<run-log-file>",
+)
+@click.option(
+    "--api-token",
+    "api_token",
+    multiple=False,
+    default=os.environ.get(API_TOKEN_ENV_NAME),
+    metavar="<api-token>",
+    help="API token for authentication. Overrides NEPTUNE_API_TOKEN environment variable",
+)
+def sync(
+    run_log_file: Path,
+    api_token: Optional[str],
+) -> None:
+    if api_token is None:
+        raise NeptuneApiTokenNotProvided()
+
+    sync_all(run_log_file, api_token)
diff --git a/src/neptune_scale/cli/sync.py b/src/neptune_scale/cli/sync.py
@@ -0,0 +1,144 @@
+#
+# Copyright (c) 2022, Neptune Labs Sp. z o.o.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+__all__ = ["sync_all"]
+
+from pathlib import Path
+from typing import Optional
+
+from tqdm import tqdm
+
+from neptune_scale.sync import sync_process
+from neptune_scale.sync.errors_tracking import (
+    ErrorsMonitor,
+    ErrorsQueue,
+)
+from neptune_scale.sync.operations_repository import (
+    OperationsRepository,
+    SequenceId,
+)
+from neptune_scale.sync.sync_process import SyncProcess
+from neptune_scale.util import (
+    SharedFloat,
+    SharedInt,
+    get_logger,
+)
+
+logger = get_logger()
+
+
+def sync_all(
+    run_log_file: Path,
+    api_token: str,
+) -> None:
+    if not run_log_file.exists():
+        raise FileNotFoundError(f"Run log file {run_log_file} does not exist")
+    run_log_file = run_log_file.resolve()
+
+    runner = SyncRunner(api_token=api_token, run_log_file=run_log_file)
+
+    try:
+        runner.start()
+        runner.wait()
+    finally:
+        runner.stop()
+
+
+class SyncRunner:
+    def __init__(
+        self,
+        api_token: str,
+        run_log_file: Path,
+    ) -> None:
+        self._api_token: str = api_token
+        self._run_log_file: Path = run_log_file
+        self._operations_repository: OperationsRepository = OperationsRepository(db_path=run_log_file)
+        self._process_link = sync_process.ProcessLink()
+        self._errors_queue: ErrorsQueue = ErrorsQueue()
+        self._last_queued_seq = SharedInt(-1)
+        self._last_ack_seq = SharedInt(-1)
+        self._last_ack_timestamp = SharedFloat(-1)
+        self._log_seq_id_range: Optional[tuple[SequenceId, SequenceId]] = None
+        self._sync_process: Optional[SyncProcess] = None
+        self._errors_monitor: Optional[ErrorsMonitor] = None
+
+    def start(
+        self,
+    ) -> None:
+        self._log_seq_id_range = self._operations_repository.get_sequence_id_range()
+        if self._log_seq_id_range is None:
+            logger.info("No operations to process")
+            return
+
+        metadata = self._operations_repository.get_metadata()
+        if metadata is None:
+            logger.error("No run metadata found in log")
+            return
+
+        self._sync_process = sync_process.SyncProcess(
+            operations_repository_path=self._run_log_file,
+            errors_queue=self._errors_queue,
+            process_link=self._process_link,
+            api_token=self._api_token,
+            project=metadata.project,
+            family=metadata.run_id,
+            last_queued_seq=self._last_queued_seq,
+            last_ack_seq=self._last_ack_seq,
+            last_ack_timestamp=self._last_ack_timestamp,
+        )
+        self._errors_monitor = ErrorsMonitor(errors_queue=self._errors_queue)
+
+        self._sync_process.start()
+        self._process_link.start()
+
+        self._errors_monitor.start()
+
+    def wait(self, progress_bar_enabled: bool = True, wait_time: float = 0.1) -> None:
+        if self._log_seq_id_range is None:
+            return
+
+        total_count = self._log_seq_id_range[1] - self._log_seq_id_range[0] + 1
+        with tqdm(
+            desc="Syncing operations", total=total_count, unit="op", disable=not progress_bar_enabled
+        ) as progress_bar:
+            while True:
+                try:
+                    with self._last_ack_seq:
+                        self._last_ack_seq.wait(timeout=wait_time)
+                        last_ack_seq_id = self._last_ack_seq.value
+
+                    if last_ack_seq_id != -1:
+                        acked_count = last_ack_seq_id - self._log_seq_id_range[0] + 1
+                        progress_bar.update(acked_count - progress_bar.n)
+
+                    if last_ack_seq_id >= self._log_seq_id_range[1]:
+                        break
+                except KeyboardInterrupt:
+                    logger.warning("Waiting interrupted by user")
+                    return
+
+    def stop(self) -> None:
+        if self._errors_monitor is not None:
+            self._errors_monitor.interrupt()
+            self._errors_monitor.join()
+
+        if self._sync_process is not None:
+            self._sync_process.terminate()
+            self._sync_process.join()
+            self._process_link.stop()
+
+        self._operations_repository.close()
+        self._errors_queue.close()
diff --git a/src/neptune_scale/sync/operations_repository.py b/src/neptune_scale/sync/operations_repository.py
@@ -24,6 +24,7 @@
 from neptune_api.proto.neptune_pb.ingest.v1.common_pb2 import Run as CreateRun
 from neptune_api.proto.neptune_pb.ingest.v1.common_pb2 import UpdateRunSnapshot
 
+from neptune_scale.exceptions import NeptuneLocalStorageInUnsupportedVersion
 from neptune_scale.util import get_logger
 
 logger = get_logger()
@@ -261,10 +262,33 @@ def get_metadata(self) -> Optional[Metadata]:
 
             version, project, run_id, parent_run_id, fork_step = row
 
+            if version != DB_VERSION:
+                raise NeptuneLocalStorageInUnsupportedVersion()
+
             return Metadata(
                 version=version, project=project, run_id=run_id, parent_run_id=parent_run_id, fork_step=fork_step
             )
 
+    def get_sequence_id_range(self) -> Optional[tuple[SequenceId, SequenceId]]:
+        with self._get_connection() as conn:  # type: ignore
+            cursor = conn.cursor()
+
+            cursor.execute(
+                """
+                SELECT MIN(sequence_id), MAX(sequence_id)
+                FROM run_operations
+                """
+            )
+
+            row = cursor.fetchone()
+            if not row:
+                return None
+
+            min_seq_id, max_seq_id = row
+            if min_seq_id is None or max_seq_id is None:
+                return None
+            return SequenceId(min_seq_id), SequenceId(max_seq_id)
+
     def close(self) -> None:
         with self._lock:
             if self._connection is not None:

diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
@@ -1,5 +1,6 @@
 import logging
 import os
+import random
 import uuid
 from datetime import (
     datetime,
@@ -83,3 +84,20 @@ def run(project, run_init_kwargs):
 def ro_run(project, run, run_init_kwargs):
     """ReadOnlyRun pointing to the same run as the neptune_scale.Run"""
     return ReadOnlyRun(read_only_project=project, custom_id=run_init_kwargs["run_id"])
+
+
+def unique_path(prefix):
+    return f"{prefix}__{datetime.now(timezone.utc).isoformat('-', 'seconds')}__{str(uuid.uuid4())[-4:]}"
+
+
+def random_series(length=10, start_step=0):
+    """Return a 2-tuple of step and value lists, both of length `length`"""
+    assert length > 0
+    assert start_step >= 0
+
+    j = random.random()
+    # Round to 0 to avoid floating point errors
+    steps = [round((j + x) ** 2.0, 0) for x in range(start_step, length)]
+    values = [round((j + x) ** 3.0, 0) for x in range(len(steps))]
+
+    return steps, values
diff --git a/tests/e2e/test_log_and_fetch.py b/tests/e2e/test_log_and_fetch.py
@@ -1,6 +1,5 @@
 import math
 import os
-import random
 import threading
 import time
 import uuid
@@ -15,33 +14,21 @@
 
 from neptune_scale.api.run import Run
 
+from .conftest import (
+    random_series,
+    unique_path,
+)
+
 NEPTUNE_PROJECT = os.getenv("NEPTUNE_E2E_PROJECT")
 SYNC_TIMEOUT = 30
 
 
-def unique_path(prefix):
-    return f"{prefix}__{datetime.now(timezone.utc).isoformat('-', 'seconds')}__{str(uuid.uuid4())[-4:]}"
-
-
 def refresh(ro_run: ReadOnlyRun):
     """Create a new ReadOnlyRun instance with the same project and custom_id,
     which is basically a "refresh" operation"""
     return ReadOnlyRun(read_only_project=ro_run.project, custom_id=ro_run["sys/custom_run_id"].fetch())
 
 
-def random_series(length=10, start_step=0):
-    """Return a 2-tuple of step and value lists, both of length `length`"""
-    assert length > 0
-    assert start_step >= 0
-
-    j = random.random()
-    # Round to 0 to avoid floating point errors
-    steps = [round((j + x) ** 2.0, 0) for x in range(start_step, length)]
-    values = [round((j + x) ** 3.0, 0) for x in range(len(steps))]
-
-    return steps, values
-
-
 def test_atoms(run, ro_run):
     """Set atoms to a value, make sure it's equal when fetched"""