Skip to content

Commit 98a7dfc

Browse files
dnerinijonasbhend
andauthored
Adopt forecast intervals including the end point (#71)
* Adopt forecast intervals including the end point * Fix parsing * Experiments work * Update config/forecasters.yaml * Align init times to availabiliy of COE * run pre-commit * Change README to COSMO-E availability --------- Co-authored-by: Jonas Bhend <jonasbhend@users.noreply.github.com> Co-authored-by: Jonas Bhend <jonas.bhend@meteoswiss.ch>
1 parent 05d3276 commit 98a7dfc

File tree

13 files changed

+85
-103
lines changed

13 files changed

+85
-103
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,25 +31,25 @@ description: |
3131
dates:
3232
start: 2020-01-01T12:00
3333
end: 2020-01-10T00:00
34-
frequency: 54h
34+
frequency: 60h
3535

3636
runs:
3737
- forecaster:
3838
mlflow_id: 2f962c89ff644ca7940072fa9cd088ec
3939
label: Stage D - N320 global grid with CERRA finetuning
40-
steps: 0/126/6
40+
steps: 0/120/6
4141
- forecaster:
4242
mlflow_id: d0846032fc7248a58b089cbe8fa4c511
4343
label: M-1 forecaster
44-
steps: 0/126/6
44+
steps: 0/120/6
4545

4646

4747
baselines:
4848
- baseline:
4949
baseline_id: COSMO-E
5050
label: COSMO-E
5151
root: /store_new/mch/msopr/ml/COSMO-E
52-
steps: 0/126/6
52+
steps: 0/120/6
5353

5454
analysis:
5555
label: COSMO KENDA

config/forecasters-co1e.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ runs:
1313
mlflow_id: 2174c939c8844555a52843b71219d425
1414
label: Cosmo 1km + era5 N320, finetuned on cerra checkpoint, lam resolution 11
1515
config: resources/inference/configs/forecaster_no_trimedge_fromtraining.yaml
16-
steps: 0/126/6
16+
steps: 0/120/6
1717
inference_resources:
1818
gpu: 4
1919
tasks: 4
@@ -23,7 +23,7 @@ baselines:
2323
baseline_id: COSMO-1E
2424
label: COSMO-1E
2525
root: /scratch/mch/bhendj/COSMO-1E
26-
steps: 0/126/6
26+
steps: 0/33/6
2727

2828
analysis:
2929
label: COSMO KENDA

config/forecasters.yaml

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,24 @@
11
# yaml-language-server: $schema=../workflow/tools/config.schema.json
22
description: |
3-
This is an experiment to do blabla.
3+
Evaluate skill of COSMO-E emulator (M-1 forecaster).
44
55
dates:
66
start: 2020-01-01T12:00
77
end: 2020-01-10T00:00
8-
# end: 2020-03-30T00:00
9-
frequency: 36h
8+
frequency: 60h
109

1110
runs:
12-
- forecaster:
13-
mlflow_id: 2f962c89ff644ca7940072fa9cd088ec
14-
label: Stage D - N320 global grid with CERRA finetuning
15-
steps: 0/126/6
1611
- forecaster:
1712
mlflow_id: d0846032fc7248a58b089cbe8fa4c511
1813
label: M-1 forecaster
19-
steps: 0/126/6
14+
steps: 0/120/6
2015

2116
baselines:
2217
- baseline:
2318
baseline_id: COSMO-E
2419
label: COSMO-E
2520
root: /store_new/mch/msopr/ml/COSMO-E
26-
steps: 0/126/6
21+
steps: 0/120/6
2722

2823
analysis:
2924
label: COSMO KENDA
@@ -38,7 +33,7 @@ locations:
3833
profile:
3934
executor: slurm
4035
global_resources:
41-
gpus: 15
36+
gpus: 16
4237
default_resources:
4338
slurm_partition: "postproc"
4439
cpus_per_task: 1

config/interpolators.yaml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
# yaml-language-server: $schema=../workflow/tools/config.schema.json
22
description: |
3-
Stretched interpolator vs LAM interpolator.
3+
Evaluate skill of SGM interpolator (M-2 interpolator).
44
55
dates:
66
start: 2020-01-01T12:00
77
end: 2020-01-10T00:00
8-
frequency: 54h
8+
frequency: 60h
99

1010
runs:
1111
- interpolator:
12-
mlflow_id: 9c18b90074214d769b8b383722fc5a06
12+
mlflow_id: 8d1e0410ca7d4f74b368b3079878259a
1313
label: M-2 interpolator (KENDA)
14-
steps: 0/121/1
15-
config: resources/inference/configs/interpolator_from_test_data.yaml
14+
steps: 0/120/1
15+
config: resources/inference/configs/interpolator_from_test_data_stretched.yaml
1616
forecaster: null
1717
extra_dependencies:
1818
- git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
@@ -21,12 +21,12 @@ runs:
2121
- interpolator:
2222
mlflow_id: 8d1e0410ca7d4f74b368b3079878259a
2323
label: M-2 interpolator (M-1 forecaster)
24-
steps: 0/121/1
24+
steps: 0/120/1
2525
config: resources/inference/configs/interpolator_stretched.yaml
2626
forecaster:
2727
mlflow_id: d0846032fc7248a58b089cbe8fa4c511
2828
config: resources/inference/configs/forecaster_with_global.yaml
29-
steps: 0/126/6
29+
steps: 0/120/6
3030
extra_dependencies:
3131
- git+https://github.com/ecmwf/anemoi-inference@14189907b4f4e3b204b7994f828831b8aa51e9b6
3232
- torch-geometric==2.6.1
@@ -35,14 +35,14 @@ runs:
3535
mlflow_id: d0846032fc7248a58b089cbe8fa4c511
3636
label: M-1 forecaster
3737
config: resources/inference/configs/forecaster_with_global.yaml
38-
steps: 0/126/6
38+
steps: 0/120/6
3939

4040
baselines:
4141
- baseline:
4242
baseline_id: COSMO-E-1h
4343
label: COSMO-E
4444
root: /scratch/mch/bhendj/COSMO-E
45-
steps: 0/121/1
45+
steps: 0/120/1
4646

4747
analysis:
4848
label: COSMO KENDA
@@ -58,7 +58,7 @@ locations:
5858
profile:
5959
executor: slurm
6060
global_resources:
61-
gpus: 15
61+
gpus: 16
6262
default_resources:
6363
slurm_partition: "postproc"
6464
cpus_per_task: 1

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,4 @@ markers = [
5050
packages = [
5151
"src/evalml",
5252
"src/verification"
53-
]
53+
]

src/evalml/config.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,10 @@ class RunConfig(BaseModel):
7474
...,
7575
description=(
7676
"Forecast lead times in hours, formatted as 'start/end/step'. "
77-
"The range is half-open [start, end), meaning it includes the start "
78-
"but excludes the end. Example: '0/126/6' for lead times every 6 hours up to 120 hours."
77+
"The range includes the start lead time and continues with the given step "
78+
"until reaching or exceeding the end lead time. "
79+
"Example: '0/120/6' for lead times every 6 hours up to 120 h, "
80+
"or '0/33/6' up to 30 h."
7981
),
8082
)
8183
extra_dependencies: List[str] = Field(
@@ -102,15 +104,13 @@ def validate_steps(cls, v: str) -> str:
102104
try:
103105
start, end, step = map(int, parts)
104106
except ValueError:
105-
raise ValueError("Start, end, and interval must be integers.")
106-
if start >= end:
107-
raise ValueError(f"Start ({start}) must be less than end ({end}).")
108-
if step <= 0:
109-
raise ValueError(f"Interval ({step}) must be a positive integer.")
110-
if (end - start) % step != 0:
107+
raise ValueError("Start, end, and step must be integers.")
108+
if start > end:
111109
raise ValueError(
112-
f"The step ({step}) must evenly divide the range ({end - start})."
110+
f"Start ({start}) must be less than or equal to end ({end})."
113111
)
112+
if step <= 0:
113+
raise ValueError(f"Step ({step}) must be a positive integer.")
114114
return v
115115

116116

workflow/rules/data.smk

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ if "extract_cosmoe" in config.get("include-optional-rules", []):
1818
runtime="24h",
1919
params:
2020
year_postfix=lambda wc: f"FCST{wc.year}",
21-
lead_time="0/126/6",
21+
steps="0/120/6",
2222
log:
2323
OUT_ROOT / "logs/extract-cosmoe-fcts-{year}.log",
2424
shell:
2525
"""
2626
python workflow/scripts/extract_baseline_fct.py \
2727
--archive_dir {input.archive}/{params.year_postfix} \
2828
--output_store {output.fcts} \
29-
--lead_time {params.lead_time} \
29+
--steps {params.steps} \
3030
> {log} 2>&1
3131
"""
3232

@@ -45,14 +45,14 @@ if "extract_cosmo1e" in config.get("include-optional-rules", []):
4545
runtime="24h",
4646
params:
4747
year_postfix=lambda wc: f"FCST{wc.year}",
48-
lead_time="0/34/1",
48+
steps="0/33/1",
4949
log:
5050
OUT_ROOT / "logs/extract-cosmo1e-fcts-{year}.log",
5151
shell:
5252
"""
5353
python workflow/scripts/extract_baseline_fct.py \
5454
--archive_dir {input.archive}/{params.year_postfix} \
5555
--output_store {output.fcts} \
56-
--lead_time {params.lead_time} \
56+
--steps {params.steps} \
5757
> {log} 2>&1
5858
"""

workflow/rules/inference.smk

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ def get_resource(wc, field: str, default):
121121
def get_leadtime(wc):
122122
"""Get the lead time from the run config."""
123123
start, end, step = RUN_CONFIGS[wc.run_id]["steps"].split("/")
124-
end = int(end) - int(step) # make inclusive
125124
return f"{end}h"
126125

127126

workflow/rules/verif.smk

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ rule verif_metrics_baseline:
2222
analysis_zarr=config["analysis"].get("analysis_zarr"),
2323
params:
2424
baseline_label=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("label"),
25-
baseline_steps=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("steps"),
25+
baseline_steps=lambda wc: BASELINE_CONFIGS[wc.baseline_id]["steps"],
2626
analysis_label=config["analysis"].get("label"),
2727
output:
2828
OUT_ROOT / "data/baselines/{baseline_id}/{init_time}/verif.nc",
@@ -38,7 +38,7 @@ rule verif_metrics_baseline:
3838
--analysis_zarr {input.analysis_zarr} \
3939
--baseline_zarr {input.baseline_zarr} \
4040
--reftime {wildcards.init_time} \
41-
--lead_time "{params.baseline_steps}" \
41+
--steps "{params.baseline_steps}" \
4242
--baseline_label "{params.baseline_label}" \
4343
--analysis_label "{params.analysis_label}" \
4444
--output {output} > {log} 2>&1
@@ -79,7 +79,7 @@ rule verif_metrics:
7979
uv run {input.script} \
8080
--grib_output_dir {input.grib_output} \
8181
--analysis_zarr {input.analysis_zarr} \
82-
--lead_time "{params.fcst_steps}" \
82+
--steps "{params.fcst_steps}" \
8383
--fcst_label "{params.fcst_label}" \
8484
--analysis_label "{params.analysis_label}" \
8585
--output {output} > {log} 2>&1

workflow/scripts/extract_baseline.py

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def check_reftime_consistency(tarfiles: list[Path], delta_h: int = 12):
4242

4343

4444
def extract(
45-
tar: Path, lead_time: list[int], run_id: str, params: list[str]
45+
tar: Path, lead_times: list[int], run_id: str, params: list[str]
4646
) -> xr.Dataset:
4747
LOG.info(f"Extracting fields from {tar}.")
4848
reftime = reftime_from_tarfile(tar)
@@ -54,7 +54,7 @@ def extract(
5454
raise ValueError("Currently only COSMO-E and COSMO-1E are supported.")
5555
tar_archive = tarfile.open(tar, mode="r:*")
5656
out = ekd.SimpleFieldList()
57-
for lt in lead_time:
57+
for lt in lead_times:
5858
filename = f"{tar.stem}/grib/{gribname}{lt:03}_{run_id}"
5959
LOG.info(f"Extracting {filename}.")
6060
stream = tar_archive.extractfile(filename)
@@ -79,23 +79,19 @@ def extract(
7979
class ScriptConfig(Namespace):
8080
archive_dir: Path
8181
output_store: Path
82-
lead_time: int
82+
steps: list[int]
8383
run_id: str
8484
params: list[str]
8585

8686

87-
def _parse_lead_time(lead_time: str) -> int:
88-
# check that lead_time is in the format "start/stop/step"
89-
if "/" not in lead_time:
90-
raise ValueError(
91-
f"Expected lead_time in format 'start/stop/step', got '{lead_time}'"
92-
)
93-
if len(lead_time.split("/")) != 3:
94-
raise ValueError(
95-
f"Expected lead_time in format 'start/stop/step', got '{lead_time}'"
96-
)
97-
98-
return list(range(*map(int, lead_time.split("/"))))
87+
def _parse_steps(steps: str) -> int:
88+
# check that steps is in the format "start/stop/step"
89+
if "/" not in steps:
90+
raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
91+
if len(steps.split("/")) != 3:
92+
raise ValueError(f"Expected steps in format 'start/stop/step', got '{steps}'")
93+
start, end, step = map(int, steps.split("/"))
94+
return list(range(start, end + 1, step))
9995

10096

10197
def main(cfg: ScriptConfig):
@@ -135,7 +131,7 @@ def main(cfg: ScriptConfig):
135131

136132
for i in indices:
137133
file = tarfiles[i]
138-
ds = extract(file, cfg.lead_time, cfg.run_id, cfg.params)
134+
ds = extract(file, cfg.steps, cfg.run_id, cfg.params)
139135

140136
LOG.info(f"Extracted: {ds}")
141137

@@ -167,7 +163,7 @@ def main(cfg: ScriptConfig):
167163
help="Path to the output zarr store.",
168164
)
169165

170-
parser.add_argument("--lead_time", type=_parse_lead_time, default="0/126/6")
166+
parser.add_argument("--steps", type=_parse_steps, default="0/120/6")
171167

172168
parser.add_argument("--run_id", type=str, default="000")
173169

@@ -193,10 +189,10 @@ def main(cfg: ScriptConfig):
193189
python workflow/scripts/extract_baseline_fct.py \
194190
--archive_dir /archive/mch/msopr/osm/COSMO-E/FCST20 \
195191
--output_store /store_new/mch/msopr/ml/COSMO-E/FCST20.zarr \
196-
--lead_time 0/126/6
192+
--steps 0/120/6
197193
198194
python workflow/scripts/extract_baseline_fct.py \
199195
--archive_dir /archive/mch/s83/osm/from_GPFS/COSMO-1E/FCST20 \
200196
--output_store /store_new/mch/msopr/ml/COSMO-1E/FCST20.zarr \
201-
--lead_time 0/34/1
197+
--steps 0/33/1
202198
"""

0 commit comments

Comments
 (0)