[DRAFT][590] Rename metrics file (ecmwf#601)

MatKbauer · web-flow · commit cbef08553644 · 2025-07-26T14:08:13.000+02:00
* Implemented backward-compatible function to read and write `{RUN-ID}_train_metrics.json` (new) or `metrics.json` (old)
diff --git a/integration_tests/small1_test.py b/integration_tests/small1_test.py
@@ -18,6 +18,7 @@
 import weathergen.common.io as io
 import weathergen.utils.config as config
 from weathergen.run_train import inference_from_args, train_with_args
+from weathergen.utils.metrics import get_train_metrics_path
 
 logger = logging.getLogger(__name__)
 
@@ -100,7 +101,7 @@ def evaluate_results(run_id):
 
 def load_metrics(run_id):
     """Helper function to load metrics"""
-    file_path = f"{weathergen_home}/results/{run_id}/metrics.json"
+    file_path = get_train_metrics_path(base_path=weathergen_home / "results", run_id=run_id)
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"Metrics file not found for run_id: {run_id}")
     with open(file_path) as f:
@@ -110,7 +111,7 @@ def load_metrics(run_id):
 
 def assert_missing_metrics_file(run_id):
     """Test that a missing metrics file raises FileNotFoundError."""
-    file_path = f"{weathergen_home}/results/{run_id}/metrics.json"
+    file_path = get_train_metrics_path(base_path=weathergen_home / "results", run_id=run_id)
     assert os.path.exists(file_path), f"Metrics file does not exist for run_id: {run_id}"
     metrics = load_metrics(run_id)
     logger.info(f"Loaded metrics for run_id: {run_id}: {metrics}")
diff --git a/src/weathergen/utils/metrics.py b/src/weathergen/utils/metrics.py
@@ -49,3 +49,16 @@ def read_metrics_file(f: str | Path) -> pl.DataFrame:
             pl.when(pl.col(n).is_not_nan()).then(df1[n]).otherwise(df2[n]).alias(n)
         )
     return df1
+
+
+def get_train_metrics_path(base_path: Path, run_id: str) -> Path:
+    """
+    Return the path to the training metrics.json for a particular run_id. This is required for
+    backwards compatibility after changing the name of the `results/{RUN-ID}/metrics.json` file to
+    `results/{RUN-ID}/{RUN-ID}_train_metrics.json` to disambiguate `metrics.json`.
+    See https://github.com/ecmwf/WeatherGenerator/issues/590 for details.
+    """
+    if (base_path / run_id / "metrics.json").exists():
+        return base_path / run_id / "metrics.json"
+    else:
+        return base_path / run_id / f"{run_id}_train_metrics.json"
diff --git a/src/weathergen/utils/train_logger.py b/src/weathergen/utils/train_logger.py
@@ -21,7 +21,7 @@
 import polars as pl
 
 import weathergen.utils.config as config
-from weathergen.utils.metrics import read_metrics_file
+from weathergen.utils.metrics import get_train_metrics_path, read_metrics_file
 
 _weathergen_timestamp = "weathergen.timestamp"
 _weathergen_reltime = "weathergen.reltime"
@@ -66,7 +66,8 @@ def log_metrics(self, stage: Stage, metrics: dict[str, float]) -> None:
         # TODO: performance: we repeatedly open the file for each call. Better for multiprocessing
         # but we can probably do better and rely for example on the logging module.
 
-        with open(self.path_run / "metrics.json", "ab") as f:
+        metrics_path = get_train_metrics_path(base_path=Path("results"), run_id=self.cf.run_id)
+        with open(metrics_path, "ab") as f:
             s = json.dumps(clean_metrics) + "\n"
             f.write(s.encode("utf-8"))
 
@@ -157,7 +158,12 @@ def read(run_id, model_path: str, epoch=-1):
 
         # define cols for training
         cols_train = ["dtime", "samples", "mse", "lr"]
-        cols1 = [_weathergen_timestamp, "num_samples", "loss_avg_0_mean", "learning_rate"]
+        cols1 = [
+            _weathergen_timestamp,
+            "num_samples",
+            "loss_avg_0_mean",
+            "learning_rate",
+        ]
         for si in cf.streams:
             for _j, lf in enumerate(cf.loss_fcts):
                 cols1 += [_key_loss(si["name"], lf[0])]
@@ -178,7 +184,13 @@ def read(run_id, model_path: str, epoch=-1):
             with open(fname_log_train, "rb") as f:
                 log_train = np.loadtxt(f, delimiter=",")
             log_train = log_train.reshape((log_train.shape[0] // len(cols_train), len(cols_train)))
-        except (TypeError, AttributeError, IndexError, ZeroDivisionError, ValueError) as e:
+        except (
+            TypeError,
+            AttributeError,
+            IndexError,
+            ZeroDivisionError,
+            ValueError,
+        ) as e:
             _logger.warning(
                 (
                     f"Warning: no training data loaded for run_id={run_id}",
@@ -230,7 +242,13 @@ def read(run_id, model_path: str, epoch=-1):
             with open(fname_log_val, "rb") as f:
                 log_val = np.loadtxt(f, delimiter=",")
             log_val = log_val.reshape((log_val.shape[0] // len(cols_val), len(cols_val)))
-        except (TypeError, AttributeError, IndexError, ZeroDivisionError, ValueError) as e:
+        except (
+            TypeError,
+            AttributeError,
+            IndexError,
+            ZeroDivisionError,
+            ValueError,
+        ) as e:
             _logger.warning(
                 (
                     f"Warning: no validation data loaded for run_id={run_id}",
@@ -265,7 +283,13 @@ def read(run_id, model_path: str, epoch=-1):
             with open(fname_perf_val, "rb") as f:
                 log_perf = np.loadtxt(f, delimiter=",")
             log_perf = log_perf.reshape((log_perf.shape[0] // len(cols_perf), len(cols_perf)))
-        except (TypeError, AttributeError, IndexError, ZeroDivisionError, ValueError) as e:
+        except (
+            TypeError,
+            AttributeError,
+            IndexError,
+            ZeroDivisionError,
+            ValueError,
+        ) as e:
             _logger.warning(
                 (
                     f"Warning: no validation data loaded for run_id={run_id}",
@@ -341,8 +365,9 @@ def read_metrics(
         run_id = cf.run_id
     assert run_id, "run_id must be provided"
 
+    metrics_path = get_train_metrics_path(base_path=results_path, run_id=run_id)
     # TODO: this should be a config option
-    df = read_metrics_file(results_path / run_id / "metrics.json")
+    df = read_metrics_file(metrics_path)
     if stage is not None:
         df = df.filter(pl.col("stage") == stage)
     df = df.drop("stage")
diff --git a/uv.lock b/uv.lock