Merge branch 'main' into feat/decouple-inference-preparation-and-execution

dnerini · dnerini · commit 1175afcd15cf · 2025-10-21T12:07:29.000+02:00
diff --git a/README.md b/README.md
@@ -31,25 +31,25 @@ description: |
 dates:
   start: 2020-01-01T12:00
   end: 2020-01-10T00:00
-  frequency: 54h
-
-lead_time: 120h
+  frequency: 60h
 
 runs:
   - forecaster:
       mlflow_id: 2f962c89ff644ca7940072fa9cd088ec
       label: Stage D - N320 global grid with CERRA finetuning
+      steps: 0/120/6
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
+      steps: 0/120/6
 
 
 baselines:
   - baseline:
       baseline_id: COSMO-E
       label: COSMO-E
       root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/126/6
+      steps: 0/120/6
 
 analysis:
   label: COSMO KENDA
diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml
@@ -8,13 +8,12 @@ dates:
   end: 2020-01-10T00:00
   frequency: 54h
 
-lead_time: 120h
-
 runs:
   - forecaster:
       mlflow_id: 2174c939c8844555a52843b71219d425
       label: Cosmo 1km + era5 N320, finetuned on cerra checkpoint, lam resolution 11
       config: resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml
+      steps: 0/120/6
       inference_resources:
         gpu: 4
         tasks: 4
@@ -24,7 +23,7 @@ baselines:
       baseline_id: COSMO-1E
       label: COSMO-1E
       root: /scratch/mch/bhendj/COSMO-1E
-      steps: 0/126/6
+      steps: 0/33/6
 
 analysis:
   label: COSMO KENDA
diff --git a/config/forecasters.yaml b/config/forecasters.yaml
@@ -1,29 +1,24 @@
 # yaml-language-server: $schema=../workflow/tools/config.schema.json
 description: |
-  This is an experiment to do blabla.
+  Evaluate skill of COSMO-E emulator (M-1 forecaster).
 
 dates:
   start: 2020-01-01T12:00
   end: 2020-01-10T00:00
-  # end: 2020-03-30T00:00
-  frequency: 36h
-
-lead_time: 120h
+  frequency: 60h
 
 runs:
-  - forecaster:
-      mlflow_id: 2f962c89ff644ca7940072fa9cd088ec
-      label: Stage D - N320 global grid with CERRA finetuning
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
+      steps: 0/120/6
 
 baselines:
   - baseline:
       baseline_id: COSMO-E
       label: COSMO-E
       root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/126/6
+      steps: 0/120/6
 
 analysis:
   label: COSMO KENDA
@@ -38,7 +33,7 @@ locations:
 profile:
   executor: slurm
   global_resources:
-    gpus: 15
+    gpus: 16
   default_resources:
     slurm_partition: "postproc"
     cpus_per_task: 1
diff --git a/config/interpolators.yaml b/config/interpolators.yaml
@@ -1,55 +1,48 @@
 # yaml-language-server: $schema=../workflow/tools/config.schema.json
 description: |
-  Stretched interpolator vs LAM interpolator.
+  Evaluate skill of SGM interpolator (M-2 interpolator).
 
 dates:
   start: 2020-01-01T12:00
   end: 2020-01-10T00:00
-  frequency: 54h
-
-lead_time: 120h
+  frequency: 60h
 
 runs:
   - interpolator:
-      mlflow_id: 9c18b90074214d769b8b383722fc5a06
-      label: LAM Interpolator (COSMO-E analysis)
-      steps: 0/121/1
-      config: resources/inference/configs/interpolator_from_test_data.yaml
+      mlflow_id: 8d1e0410ca7d4f74b368b3079878259a
+      label: M-2 interpolator (KENDA)
+      steps: 0/120/1
+      config: resources/inference/configs/interpolator_from_test_data_stretched.yaml
       forecaster: null
       extra_dependencies:
-        - git+https://github.com/ecmwf/anemoi-inference@fix/cutout-preprocessors
-        - torch-geometric==2.6.1
-        - anemoi-graphs==0.5.2
-  - interpolator:
-      mlflow_id: 9c18b90074214d769b8b383722fc5a06
-      label: LAM Interpolator (M-1 forecaster)
-      steps: 0/121/1
-      forecaster:
-        mlflow_id: d0846032fc7248a58b089cbe8fa4c511
-        config: resources/inference/configs/forecaster_with_global.yaml
-      extra_dependencies:
-        - git+https://github.com/ecmwf/anemoi-inference@fix/cutout-preprocessors
+        - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
         - torch-geometric==2.6.1
         - anemoi-graphs==0.5.2
   - interpolator:
-      mlflow_id: 07c3d9698db14d859b78bb712a65bbbf
-      label: SGM Interpolator (M-1 forecaster)
-      steps: 0/121/1
+      mlflow_id: 8d1e0410ca7d4f74b368b3079878259a
+      label: M-2 interpolator (M-1 forecaster)
+      steps: 0/120/1
       config: resources/inference/configs/interpolator_stretched.yaml
       forecaster:
         mlflow_id: d0846032fc7248a58b089cbe8fa4c511
         config: resources/inference/configs/forecaster_with_global.yaml
+        steps: 0/120/6
       extra_dependencies:
-        - git+https://github.com/ecmwf/anemoi-inference@fix/cutout-preprocessors
+        - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
         - torch-geometric==2.6.1
         - anemoi-graphs==0.5.2
+  - forecaster:
+      mlflow_id: d0846032fc7248a58b089cbe8fa4c511
+      label: M-1 forecaster
+      config: resources/inference/configs/forecaster_with_global.yaml
+      steps: 0/120/6
 
 baselines:
   - baseline:
       baseline_id: COSMO-E-1h
       label: COSMO-E
       root: /scratch/mch/bhendj/COSMO-E
-      steps: 0/121/1
+      steps: 0/120/1
 
 analysis:
   label: COSMO KENDA
@@ -65,7 +58,7 @@ locations:
 profile:
   executor: slurm
   global_resources:
-    gpus: 15
+    gpus: 16
   default_resources:
     slurm_partition: "postproc"
     cpus_per_task: 1
diff --git a/pyproject.toml b/pyproject.toml
@@ -50,4 +50,4 @@ markers = [
 packages = [
   "src/evalml",
   "src/verification"
-]
+]
diff --git a/src/evalml/config.py b/src/evalml/config.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 from typing import Dict, List, Any
 
-from pydantic import BaseModel, Field, RootModel, HttpUrl
+from pydantic import BaseModel, Field, RootModel, HttpUrl, field_validator
 
 PROJECT_ROOT = Path(__file__).parents[2]
 
@@ -70,9 +70,15 @@ class RunConfig(BaseModel):
         None,
         description="The label for the run that will be used in experiment results such as reports and figures.",
     )
-    steps: str | None = Field(
-        None,
-        description="Forecast steps to be used from interpolator, e.g. '0/126/6'.",
+    steps: str = Field(
+        ...,
+        description=(
+            "Forecast lead times in hours, formatted as 'start/end/step'. "
+            "The range includes the start lead time and continues with the given step "
+            "until reaching or exceeding the end lead time. "
+            "Example: '0/120/6' for lead times every 6 hours up to 120 h, "
+            "or '0/33/6' up to 30 h."
+        ),
     )
     extra_dependencies: List[str] = Field(
         default_factory=list,
@@ -86,6 +92,27 @@ class RunConfig(BaseModel):
 
     config: Dict[str, Any] | str
 
+    @field_validator("steps")
+    def validate_steps(cls, v: str) -> str:
+        if "/" not in v:
+            raise ValueError(
+                f"Steps must follow the format 'start/stop/step', got '{v}'"
+            )
+        parts = v.split("/")
+        if len(parts) != 3:
+            raise ValueError("Steps must be formatted as 'start/end/step'.")
+        try:
+            start, end, step = map(int, parts)
+        except ValueError:
+            raise ValueError("Start, end, and step must be integers.")
+        if start > end:
+            raise ValueError(
+                f"Start ({start}) must be less than or equal to end ({end})."
+            )
+        if step <= 0:
+            raise ValueError(f"Step ({step}) must be a positive integer.")
+        return v
+
 
 class ForecasterConfig(RunConfig):
     """Single training run stored in MLflow."""
@@ -240,9 +267,6 @@ class ConfigModel(BaseModel):
         description="Description of the experiment, e.g. 'Hindcast of the 2023 season.'",
     )
     dates: Dates | ExplicitDates
-    lead_time: str = Field(
-        ..., description="Forecast length, e.g. '120h'", pattern=r"^\d+[hmd]$"
-    )
     runs: List[ForecasterItem | InterpolatorItem] = Field(
         ...,
         description="Dictionary of runs to execute, with run IDs as keys and configurations as values.",
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -54,7 +54,7 @@ rule sandbox_all:
     input:
         expand(
             rules.create_inference_sandbox.output.sandbox,
-            run_id=collect_all_runs(),
+            run_id=collect_all_candidates(),
         ),
 
 
@@ -64,7 +64,7 @@ rule run_inference_all:
         expand(
             OUT_ROOT / "data/runs/{run_id}/{init_time}/raw",
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
-            run_id=collect_all_runs(),
+            run_id=collect_all_candidates(),
         ),
 
 
@@ -73,7 +73,7 @@ rule verif_metrics_all:
         expand(
             rules.verif_metrics.output,
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
-            run_id=collect_all_runs(),
+            run_id=collect_all_candidates(),
         ),
 
 
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
@@ -67,12 +67,13 @@ REFTIMES = _reftimes()
 
 
 def collect_all_runs():
-    """Collect all runs defined in the configuration."""
+    """Collect all runs defined in the configuration, including secondary runs."""
     runs = {}
     for run_entry in copy.deepcopy(config["runs"]):
         model_type = next(iter(run_entry))
         run_config = run_entry[model_type]
         run_config["model_type"] = model_type
+        run_config["is_candidate"] = True
         run_id = run_config["mlflow_id"][0:9]
 
         if model_type == "interpolator":
@@ -83,6 +84,7 @@ def collect_all_runs():
                 # Ensure a proper 'forecaster' entry exists with model_type
                 fore_cfg = copy.deepcopy(run_config["forecaster"])
                 fore_cfg["model_type"] = "forecaster"
+                fore_cfg["is_candidate"] = False  # exclude from outputs
                 runs[tail_id] = fore_cfg
             run_id = f"{run_id}-{tail_id}"
 
@@ -91,6 +93,16 @@ def collect_all_runs():
     return runs
 
 
+def collect_all_candidates():
+    """Collect participating runs ('candidates') only."""
+    runs = collect_all_runs()
+    candidates = {}
+    for run_id, run_config in runs.items():
+        if run_config.get("is_candidate", False):
+            candidates[run_id] = run_config
+    return candidates
+
+
 def collect_all_baselines():
     """Collect all baselines defined in the configuration."""
     baselines = {}
@@ -107,7 +119,8 @@ def collect_experiment_participants():
     for base in BASELINE_CONFIGS.keys():
         participants[base] = OUT_ROOT / f"data/baselines/{base}/verif_aggregated.nc"
     for exp in RUN_CONFIGS.keys():
-        participants[exp] = OUT_ROOT / f"data/runs/{exp}/verif_aggregated.nc"
+        if RUN_CONFIGS[exp].get("is_candidate", False):
+            participants[exp] = OUT_ROOT / f"data/runs/{exp}/verif_aggregated.nc"
     return participants
 
 
diff --git a/workflow/rules/data.smk b/workflow/rules/data.smk
@@ -18,15 +18,15 @@ if "extract_cosmoe" in config.get("include-optional-rules", []):
             runtime="24h",
         params:
             year_postfix=lambda wc: f"FCST{wc.year}",
-            lead_time="0/126/6",
+            steps="0/120/6",
         log:
             OUT_ROOT / "logs/extract-cosmoe-fcts-{year}.log",
         shell:
             """
             python workflow/scripts/extract_baseline_fct.py \
                 --archive_dir {input.archive}/{params.year_postfix} \
                 --output_store {output.fcts} \
-                --lead_time {params.lead_time} \
+                --steps {params.steps} \
                     > {log} 2>&1
             """
 
@@ -45,14 +45,14 @@ if "extract_cosmo1e" in config.get("include-optional-rules", []):
             runtime="24h",
         params:
             year_postfix=lambda wc: f"FCST{wc.year}",
-            lead_time="0/34/1",
+            steps="0/33/1",
         log:
             OUT_ROOT / "logs/extract-cosmo1e-fcts-{year}.log",
         shell:
             """
             python workflow/scripts/extract_baseline_fct.py \
                 --archive_dir {input.archive}/{params.year_postfix} \
                 --output_store {output.fcts} \
-                --lead_time {params.lead_time} \
+                --steps {params.steps} \
                     > {log} 2>&1
             """
diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk
@@ -135,6 +135,12 @@ def get_resource(wc, field: str, default):
         return getattr(rc["inference_resources"], field) or default
 
 
+def get_leadtime(wc):
+    """Get the lead time from the run config."""
+    start, end, step = RUN_CONFIGS[wc.run_id]["steps"].split("/")
+    return f"{end}h"
+
+
 rule prepare_inference_forecaster:
     localrule: True
     input:
@@ -151,7 +157,7 @@ rule prepare_inference_forecaster:
         checkpoints_path=parse_input(
             input.pyproject, parse_toml, key="tool.anemoi.checkpoints_path"
         ),
-        lead_time=config["lead_time"],
+        lead_time=lambda wc: get_leadtime(wc),
         output_root=(OUT_ROOT / "data").resolve(),
         resources_root=Path("resources/inference").resolve(),
         reftime_to_iso=lambda wc: datetime.strptime(
@@ -235,7 +241,7 @@ rule prepare_inference_interpolator:
         checkpoints_path=parse_input(
             input.pyproject, parse_toml, key="tool.anemoi.checkpoints_path"
         ),
-        lead_time=config["lead_time"],
+        lead_time=lambda wc: get_leadtime(wc),
         output_root=(OUT_ROOT / "data").resolve(),
         resources_root=Path("resources/inference").resolve(),
         reftime_to_iso=lambda wc: datetime.strptime(
diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
diff --git a/workflow/scripts/extract_baseline.py b/workflow/scripts/extract_baseline.py
diff --git a/workflow/scripts/verif_baseline.py b/workflow/scripts/verif_baseline.py
diff --git a/workflow/scripts/verif_from_grib.py b/workflow/scripts/verif_from_grib.py
diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json