Implement regional evaluation (ecmwf#652)

mlangguth89 · web-flow · commit 089de3b388a4 · 2025-08-05T22:42:32.000+02:00
* Add RegionBoundingBox data class to score-utils to handle evaluation for different regions.

* Implement region-specific evaluation in plot_inference.py.

* Adapted utils.

* Introduction of clean RegionLibrary in score_utils.py.

* Ruffed code.

* Updates following reviewer comments.

* Ruffed code.
diff --git a/packages/evaluate/src/weathergen/evaluate/plot_inference.py b/packages/evaluate/src/weathergen/evaluate/plot_inference.py
@@ -38,9 +38,10 @@ def run_main(cfg: DictConfig) -> None:
     out_scores_dir.mkdir(parents=True, exist_ok=True)
 
     metrics = cfg.evaluation.metrics
+    regions = cfg.evaluation.get("regions", ["global"])
 
-    # to get a structure like: scores_dict[metric][stream][run_id] = plot
-    scores_dict = defaultdict(lambda: defaultdict(dict))
+    # to get a structure like: scores_dict[metric][region][stream][run_id] = plot
+    scores_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
 
     for run_id, run in runs.items():
         _logger.info(f"RUN {run_id}: Getting data...")
@@ -59,34 +60,37 @@ def run_main(cfg: DictConfig) -> None:
             if stream_dict.get("evaluation"):
                 _logger.info(f"Retrieve or compute scores for {run_id} - {stream}...")
 
-                metrics_to_compute = []
-                for metric in metrics:
-                    try:
-                        metric_data = retrieve_metric_from_json(
+                for region in regions:
+                    metrics_to_compute = []
+
+                    for metric in metrics:
+                        try:
+                            metric_data = retrieve_metric_from_json(
+                                out_scores_dir,
+                                run_id,
+                                stream,
+                                region,
+                                metric,
+                                run.epoch,
+                            )
+                            scores_dict[metric][region][stream][run_id] = metric_data
+                        except (FileNotFoundError, KeyError, ValueError):
+                            metrics_to_compute.append(metric)
+
+                    if metrics_to_compute:
+                        all_metrics, points_per_sample = calc_scores_per_stream(
+                            cfg, run_id, stream, region, metrics_to_compute
+                        )
+
+                        metric_list_to_json(
+                            [all_metrics],
+                            [points_per_sample],
+                            [stream],
+                            region,
                             out_scores_dir,
                             run_id,
-                            stream,
-                            metric,
                             run.epoch,
-                            run.rank,
                         )
-                        scores_dict[metric][stream][run_id] = metric_data
-                    except (FileNotFoundError, KeyError, ValueError):
-                        metrics_to_compute.append(metric)
-                if metrics_to_compute:
-                    all_metrics, points_per_sample = calc_scores_per_stream(
-                        cfg, run_id, stream, metrics_to_compute
-                    )
-
-                    metric_list_to_json(
-                        [all_metrics],
-                        [points_per_sample],
-                        [stream],
-                        out_scores_dir,
-                        run_id,
-                        run.epoch,
-                        run.rank,
-                    )
 
                     for metric in metrics_to_compute:
                         scores_dict[metric][stream][run_id] = all_metrics.sel(
diff --git a/packages/evaluate/src/weathergen/evaluate/score_utils.py b/packages/evaluate/src/weathergen/evaluate/score_utils.py
@@ -7,10 +7,16 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 
-from typing import Any
+import logging
+from dataclasses import dataclass
+from typing import Any, ClassVar
 
+import xarray as xr
 from omegaconf.listconfig import ListConfig
 
+_logger = logging.getLogger(__name__)
+_logger.setLevel(logging.INFO)
+
 
 def to_list(obj: Any) -> list:
     """
@@ -30,3 +36,100 @@ def to_list(obj: Any) -> list:
     elif not isinstance(obj, list):
         obj = [obj]
     return obj
+
+
+class RegionLibrary:
+    """
+    Predefined bounding boxes for known regions.
+    """
+
+    REGIONS: ClassVar[dict[str, tuple[float, float, float, float]]] = {
+        "global": (-90.0, 90.0, -180.0, 180.0),
+        "nhem": (0.0, 90.0, -180.0, 180.0),
+        "shem": (-90.0, 0.0, -180.0, 180.0),
+        "tropics": (-30.0, 30.0, -180.0, 180.0),
+    }
+
+
+@dataclass(frozen=True)
+class RegionBoundingBox:
+    lat_min: float
+    lat_max: float
+    lon_min: float
+    lon_max: float
+
+    def __post_init__(self):
+        """Validate the bounding box coordinates."""
+        self.validate()
+
+    def validate(self):
+        """Validate the bounding box coordinates."""
+        if not (-90 <= self.lat_min <= 90 and -90 <= self.lat_max <= 90):
+            raise ValueError(
+                f"Latitude bounds must be between -90 and 90. Got: {self.lat_min}, {self.lat_max}"
+            )
+        if not (-180 <= self.lon_min <= 180 and -180 <= self.lon_max <= 180):
+            raise ValueError(
+                f"Longitude bounds must be between -180 and 180. Got: {self.lon_min}, {self.lon_max}"
+            )
+        if self.lat_min >= self.lat_max:
+            raise ValueError(
+                f"Latitude minimum must be less than maximum. Got: {self.lat_min}, {self.lat_max}"
+            )
+        if self.lon_min >= self.lon_max:
+            raise ValueError(
+                f"Longitude minimum must be less than maximum. Got: {self.lon_min}, {self.lon_max}"
+            )
+
+    def contains(self, lat: float, lon: float) -> bool:
+        """Check if a lat/lon point is within the bounding box."""
+        return (self.lat_min <= lat <= self.lat_max) and (
+            self.lon_min <= lon <= self.lon_max
+        )
+
+    def apply_mask(
+        self,
+        data: xr.Dataset | xr.DataArray,
+        lat_name: str = "lat",
+        lon_name: str = "lon",
+        data_dim: str = "ipoint",
+    ) -> xr.Dataset | xr.DataArray:
+        """Filter Dataset or DataArray by spatial bounding box on 'ipoint' dimension.
+        Parameters
+        ----------
+        data :
+            The data to filter.
+        lat_name:
+            Name of the latitude coordinate in the data.
+        lon_name:
+            Name of the longitude coordinate in the data.
+        data_dim:
+            Name of the dimension that contains the lat/lon coordinates.
+
+        Returns
+        -------
+        Filtered data with only points within the bounding box.
+        """
+        # lat/lon coordinates should be 1D and aligned with ipoint
+        lat = data[lat_name]
+        lon = data[lon_name]
+
+        mask = (
+            (lat >= self.lat_min)
+            & (lat <= self.lat_max)
+            & (lon >= self.lon_min)
+            & (lon <= self.lon_max)
+        )
+
+        return data.sel({data_dim: mask})
+
+    @classmethod
+    def from_region_name(cls, region: str) -> "RegionBoundingBox":
+        region = region.lower()
+        try:
+            return cls(*RegionLibrary.REGIONS[region])
+        except KeyError as err:
+            raise ValueError(
+                f"Region '{region}' is not supported. "
+                f"Available regions: {', '.join(RegionLibrary.REGIONS.keys())}"
+            ) from err
diff --git a/packages/evaluate/src/weathergen/evaluate/utils.py b/packages/evaluate/src/weathergen/evaluate/utils.py