Merge branch 'main' into plotting

dnerini · dnerini · commit 61b871e1ce31 · 2025-10-21T21:15:33.000+02:00
diff --git a/README.md b/README.md
@@ -31,25 +31,25 @@ description: |
 dates:
   start: 2020-01-01T12:00
   end: 2020-01-10T00:00
-  frequency: 54h
+  frequency: 60h
 
 runs:
   - forecaster:
       mlflow_id: 2f962c89ff644ca7940072fa9cd088ec
       label: Stage D - N320 global grid with CERRA finetuning
-      steps: 0/126/6
+      steps: 0/120/6
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
-      steps: 0/126/6
+      steps: 0/120/6
 
 
 baselines:
   - baseline:
       baseline_id: COSMO-E
       label: COSMO-E
       root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/126/6
+      steps: 0/120/6
 
 analysis:
   label: COSMO KENDA
diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml
@@ -4,16 +4,16 @@ description: |
   (KENDA-1) at 1km resolution.
 
 dates:
-  start: 2024-01-01T12:00
-  end: 2024-01-10T00:00
+  start: 2020-08-01T12:00
+  end: 2020-08-10T00:00
   frequency: 54h
 
 runs:
   - forecaster:
       mlflow_id: 2174c939c8844555a52843b71219d425
       label: Cosmo 1km + era5 N320, finetuned on cerra checkpoint, lam resolution 11
-      config: resources/inference/configs/forecaster_no_trimedge_with_global.yaml
-      steps: 0/126/6
+      config: resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml
+      steps: 0/120/6
       inference_resources:
         gpu: 4
         tasks: 4
@@ -25,7 +25,7 @@ baselines:
       baseline_id: COSMO-1E
       label: COSMO-1E
       root: /scratch/mch/bhendj/COSMO-1E
-      steps: 0/126/6
+      steps: 0/33/6
 
 analysis:
   label: COSMO KENDA
diff --git a/config/forecasters.yaml b/config/forecasters.yaml
@@ -1,30 +1,24 @@
 # yaml-language-server: $schema=../workflow/tools/config.schema.json
 description: |
-  This is an experiment to do blabla.
+  Evaluate skill of COSMO-E emulator (M-1 forecaster).
 
 dates:
   start: 2020-01-01T12:00
   end: 2020-01-10T00:00
-  frequency: 36h
+  frequency: 60h
 
 runs:
-  - forecaster:
-      mlflow_id: 2f962c89ff644ca7940072fa9cd088ec
-      label: Stage D - N320 global grid with CERRA finetuning
-      config: resources/inference/configs/forecaster_with_global.yaml
-      steps: 0/126/6
   - forecaster:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
-      config: resources/inference/configs/forecaster_with_global.yaml
-      steps: 0/126/6
+      steps: 0/120/6
 
 baselines:
   - baseline:
       baseline_id: COSMO-E
       label: COSMO-E
       root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/126/6
+      steps: 0/120/6
 
 analysis:
   label: COSMO KENDA
@@ -39,7 +33,7 @@ locations:
 profile:
   executor: slurm
   global_resources:
-    gpus: 15
+    gpus: 16
   default_resources:
     slurm_partition: "postproc"
     cpus_per_task: 1
diff --git a/config/interpolators.yaml b/config/interpolators.yaml
@@ -1,18 +1,18 @@
 # yaml-language-server: $schema=../workflow/tools/config.schema.json
 description: |
-  Stretched interpolator vs LAM interpolator.
+  Evaluate skill of SGM interpolator (M-2 interpolator).
 
 dates:
   start: 2020-01-01T12:00
   end: 2020-01-10T00:00
-  frequency: 54h
+  frequency: 60h
 
 runs:
   - interpolator:
-      mlflow_id: 9c18b90074214d769b8b383722fc5a06
+      mlflow_id: 8d1e0410ca7d4f74b368b3079878259a
       label: M-2 interpolator (KENDA)
-      steps: 0/121/1
-      config: resources/inference/configs/interpolator_from_test_data.yaml
+      steps: 0/120/1
+      config: resources/inference/configs/interpolator_from_test_data_stretched.yaml
       forecaster: null
       extra_dependencies:
         - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
@@ -21,12 +21,12 @@ runs:
   - interpolator:
       mlflow_id: 8d1e0410ca7d4f74b368b3079878259a
       label: M-2 interpolator (M-1 forecaster)
-      steps: 0/121/1
+      steps: 0/120/1
       config: resources/inference/configs/interpolator_stretched.yaml
       forecaster:
         mlflow_id: d0846032fc7248a58b089cbe8fa4c511
         config: resources/inference/configs/forecaster_with_global.yaml
-        steps: 0/126/6
+        steps: 0/120/6
       extra_dependencies:
         - git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
         - torch-geometric==2.6.1
@@ -35,14 +35,14 @@ runs:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
       config: resources/inference/configs/forecaster_with_global.yaml
-      steps: 0/126/6
+      steps: 0/120/6
 
 baselines:
   - baseline:
       baseline_id: COSMO-E-1h
       label: COSMO-E
       root: /scratch/mch/bhendj/COSMO-E
-      steps: 0/121/1
+      steps: 0/120/1
 
 analysis:
   label: COSMO KENDA
@@ -58,7 +58,7 @@ locations:
 profile:
   executor: slurm
   global_resources:
-    gpus: 15
+    gpus: 16
   default_resources:
     slurm_partition: "postproc"
     cpus_per_task: 1
diff --git a/config/showcase.yaml b/config/showcase.yaml
@@ -12,14 +12,14 @@ runs:
       mlflow_id: d0846032fc7248a58b089cbe8fa4c511
       label: M-1 forecaster
       config: resources/inference/configs/forecaster_with_global.yaml
-      steps: 0/126/6
+      steps: 0/120/6
 
 baselines:
   - baseline:
       baseline_id: COSMO-E
       label: COSMO-E
       root: /store_new/mch/msopr/ml/COSMO-E
-      steps: 0/12/6
+      steps: 0/120/6
 
 analysis:
   label: COSMO KENDA
diff --git a/pyproject.toml b/pyproject.toml
@@ -53,4 +53,4 @@ markers = [
 packages = [
   "src/evalml",
   "src/verification"
-]
+]
diff --git a/src/evalml/config.py b/src/evalml/config.py
@@ -74,8 +74,10 @@ class RunConfig(BaseModel):
         ...,
         description=(
             "Forecast lead times in hours, formatted as 'start/end/step'. "
-            "The range is half-open [start, end), meaning it includes the start  "
-            "but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours."
+            "The range includes the start lead time and continues with the given step "
+            "until reaching or exceeding the end lead time. "
+            "Example: '0/120/6' for lead times every 6 hours up to 120 h, "
+            "or '0/33/6' up to 30 h."
         ),
     )
     extra_dependencies: List[str] = Field(
@@ -102,15 +104,13 @@ def validate_steps(cls, v: str) -> str:
         try:
             start, end, step = map(int, parts)
         except ValueError:
-            raise ValueError("Start, end, and interval must be integers.")
-        if start >= end:
-            raise ValueError(f"Start ({start}) must be less than end ({end}).")
-        if step <= 0:
-            raise ValueError(f"Interval ({step}) must be a positive integer.")
-        if (end - start) % step != 0:
+            raise ValueError("Start, end, and step must be integers.")
+        if start > end:
             raise ValueError(
-                f"The step ({step}) must evenly divide the range ({end - start})."
+                f"Start ({start}) must be less than or equal to end ({end})."
             )
+        if step <= 0:
+            raise ValueError(f"Step ({step}) must be a positive integer.")
         return v
 
 
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -63,7 +63,7 @@ rule sandbox_all:
     input:
         expand(
             rules.create_inference_sandbox.output.sandbox,
-            run_id=collect_all_runs(),
+            run_id=collect_all_candidates(),
         ),
 
 
@@ -73,7 +73,7 @@ rule run_inference_all:
         expand(
             OUT_ROOT / "data/runs/{run_id}/{init_time}/raw",
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
-            run_id=collect_all_runs(),
+            run_id=collect_all_candidates(),
         ),
 
 
@@ -82,7 +82,7 @@ rule verif_metrics_all:
         expand(
             rules.verif_metrics.output,
             init_time=[t.strftime("%Y%m%d%H%M") for t in REFTIMES],
-            run_id=collect_all_runs(),
+            run_id=collect_all_candidates(),
         ),
 
 
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
@@ -68,12 +68,13 @@ REFTIMES = _reftimes()
 
 
 def collect_all_runs():
-    """Collect all runs defined in the configuration."""
+    """Collect all runs defined in the configuration, including secondary runs."""
     runs = {}
     for run_entry in copy.deepcopy(config["runs"]):
         model_type = next(iter(run_entry))
         run_config = run_entry[model_type]
         run_config["model_type"] = model_type
+        run_config["is_candidate"] = True
         run_id = run_config["mlflow_id"][0:9]
 
         if model_type == "interpolator":
@@ -84,6 +85,7 @@ def collect_all_runs():
                 # Ensure a proper 'forecaster' entry exists with model_type
                 fore_cfg = copy.deepcopy(run_config["forecaster"])
                 fore_cfg["model_type"] = "forecaster"
+                fore_cfg["is_candidate"] = False  # exclude from outputs
                 runs[tail_id] = fore_cfg
             run_id = f"{run_id}-{tail_id}"
 
@@ -92,6 +94,16 @@ def collect_all_runs():
     return runs
 
 
+def collect_all_candidates():
+    """Collect participating runs ('candidates') only."""
+    runs = collect_all_runs()
+    candidates = {}
+    for run_id, run_config in runs.items():
+        if run_config.get("is_candidate", False):
+            candidates[run_id] = run_config
+    return candidates
+
+
 def collect_all_baselines():
     """Collect all baselines defined in the configuration."""
     baselines = {}
@@ -108,7 +120,8 @@ def collect_experiment_participants():
     for base in BASELINE_CONFIGS.keys():
         participants[base] = OUT_ROOT / f"data/baselines/{base}/verif_aggregated.nc"
     for exp in RUN_CONFIGS.keys():
-        participants[exp] = OUT_ROOT / f"data/runs/{exp}/verif_aggregated.nc"
+        if RUN_CONFIGS[exp].get("is_candidate", False):
+            participants[exp] = OUT_ROOT / f"data/runs/{exp}/verif_aggregated.nc"
     return participants
 
 
diff --git a/workflow/rules/data.smk b/workflow/rules/data.smk
@@ -18,15 +18,15 @@ if "extract_cosmoe" in config.get("include-optional-rules", []):
             runtime="24h",
         params:
             year_postfix=lambda wc: f"FCST{wc.year}",
-            lead_time="0/126/6",
+            steps="0/120/6",
         log:
             OUT_ROOT / "logs/extract-cosmoe-fcts-{year}.log",
         shell:
             """
             python workflow/scripts/extract_baseline_fct.py \
                 --archive_dir {input.archive}/{params.year_postfix} \
                 --output_store {output.fcts} \
-                --lead_time {params.lead_time} \
+                --steps {params.steps} \
                     > {log} 2>&1
             """
 
@@ -45,14 +45,14 @@ if "extract_cosmo1e" in config.get("include-optional-rules", []):
             runtime="24h",
         params:
             year_postfix=lambda wc: f"FCST{wc.year}",
-            lead_time="0/34/1",
+            steps="0/33/1",
         log:
             OUT_ROOT / "logs/extract-cosmo1e-fcts-{year}.log",
         shell:
             """
             python workflow/scripts/extract_baseline_fct.py \
                 --archive_dir {input.archive}/{params.year_postfix} \
                 --output_store {output.fcts} \
-                --lead_time {params.lead_time} \
+                --steps {params.steps} \
                     > {log} 2>&1
             """
diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk
@@ -121,7 +121,6 @@ def get_resource(wc, field: str, default):
 def get_leadtime(wc):
     """Get the lead time from the run config."""
     start, end, step = RUN_CONFIGS[wc.run_id]["steps"].split("/")
-    end = int(end) - int(step)  # make inclusive
     return f"{end}h"
 
 
diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
@@ -22,7 +22,7 @@ rule verif_metrics_baseline:
         analysis_zarr=config["analysis"].get("analysis_zarr"),
     params:
         baseline_label=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("label"),
-        baseline_steps=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("steps"),
+        baseline_steps=lambda wc: BASELINE_CONFIGS[wc.baseline_id]["steps"],
         analysis_label=config["analysis"].get("label"),
     output:
         OUT_ROOT / "data/baselines/{baseline_id}/{init_time}/verif.nc",
@@ -38,7 +38,7 @@ rule verif_metrics_baseline:
             --analysis_zarr {input.analysis_zarr} \
             --baseline_zarr {input.baseline_zarr} \
             --reftime {wildcards.init_time} \
-            --lead_time "{params.baseline_steps}" \
+            --steps "{params.baseline_steps}" \
             --baseline_label "{params.baseline_label}" \
             --analysis_label "{params.analysis_label}" \
             --output {output} > {log} 2>&1
@@ -79,7 +79,7 @@ rule verif_metrics:
         uv run {input.script} \
             --grib_output_dir {input.grib_output} \
             --analysis_zarr {input.analysis_zarr} \
-            --lead_time "{params.fcst_steps}" \
+            --steps "{params.fcst_steps}" \
             --fcst_label "{params.fcst_label}" \
             --analysis_label "{params.analysis_label}" \
             --output {output} > {log} 2>&1
diff --git a/workflow/scripts/extract_baseline.py b/workflow/scripts/extract_baseline.py
diff --git a/workflow/scripts/verif_baseline.py b/workflow/scripts/verif_baseline.py
diff --git a/workflow/scripts/verif_from_grib.py b/workflow/scripts/verif_from_grib.py
diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json