From c466b9c1f833d8b7b755839235e9031f4e4b0427 Mon Sep 17 00:00:00 2001
From: m-rempel <martin.rempel@gmx.de>
Date: Tue, 27 May 2025 10:30:12 +0000
Subject: [PATCH 1/4] Add importer for DWD radar products

---
 pysteps/io/importers.py | 349 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 349 insertions(+)

diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py
index 9cfaa17f..4a37f555 100644
--- a/pysteps/io/importers.py
+++ b/pysteps/io/importers.py
@@ -86,6 +86,7 @@
 
 import gzip
 import os
+import array
 from functools import partial
 
 import numpy as np
@@ -1617,3 +1618,351 @@ def _import_saf_crri_geodata(filename):
     ds_rainfall.close()
 
     return geodata
+
+
+@postprocess_import()
+def import_dwd_hdf5(filename, qty="RATE", **kwargs):
+    """
+    Import a DWD precipitation product field (and optionally the quality
+    field) from a HDF5 file conforming to the ODIM specification
+
+    Parameters
+    ----------
+    filename: str
+        Name of the file to import.
+    qty: {'RATE', 'ACRR', 'DBZH'}
+        The quantity to read from the file. The currently supported identitiers
+        are: 'RATE'=instantaneous rain rate (mm/h), 'ACRR'=hourly rainfall
+        accumulation (mm) and 'DBZH'=max-reflectivity (dBZ). The default value
+        is 'RATE'.
+
+    {extra_kwargs_doc}
+
+    Returns
+    -------
+    out: tuple
+        A three-element tuple containing the DWD preciptiaton product with the
+        requested quantity and the associated quality field and metadata. The
+        quality field is read from the file if it contains a dataset whose
+        quantity identifier is 'QIND'.
+    """
+    if not H5PY_IMPORTED:
+        raise MissingOptionalDependency(
+            "h5py package is required to import "
+            "radar reflectivity composites using ODIM HDF5 specification "
+            "but it is not installed"
+        )
+
+    if qty not in ["ACRR", "DBZH", "RATE"]:
+        raise ValueError(
+            "unknown quantity %s: the available options are 'ACRR', 'DBZH' and 'RATE'"
+        )
+
+    f = h5py.File(filename, "r")
+
+    precip = None
+    quality = None
+
+    for dsg in f.items():
+        if dsg[0].startswith("dataset"):
+            what_grp_found = False
+            # check if the "what" group is in the "dataset" group
+            if "what" in list(dsg[1].keys()):
+                if "quantity" in dsg[1]["what"].attrs.keys():
+                    try:
+                        qty_, gain, offset, nodata, undetect = (
+                            _read_opera_hdf5_what_group(dsg[1]["what"])
+                        )
+                        what_grp_found = True
+                    except KeyError:
+                        pass
+
+            for dg in dsg[1].items():
+                if dg[0][0:4] == "data":
+                    # check if the "what" group is in the "data" group
+                    if "what" in list(dg[1].keys()):
+                        (
+                            qty_,
+                            gain,
+                            offset,
+                            nodata,
+                            undetect,
+                        ) = _read_opera_hdf5_what_group(dg[1]["what"])
+                    elif not what_grp_found:
+                        raise DataModelError(
+                            "Non ODIM compliant file: "
+                            "no what group found from {} "
+                            "or its subgroups".format(dg[0])
+                        )
+
+                    if qty_.decode() in [qty, "QIND"]:
+                        arr = dg[1]["data"][...]
+                        mask_n = arr == nodata
+                        mask_u = arr == undetect
+                        mask = np.logical_and(~mask_u, ~mask_n)
+
+                        if qty_.decode() == qty:
+                            precip = np.empty(arr.shape)
+                            precip[mask] = arr[mask] * gain + offset
+                            if qty != "DBZH":
+                                precip[mask_u] = offset
+                            else:
+                                precip[mask_u] = -30.0
+                            precip[mask_n] = np.nan
+                        elif qty_.decode() == "QIND":
+                            quality = np.empty(arr.shape, dtype=float)
+                            quality[mask] = arr[mask]
+                            quality[~mask] = np.nan
+
+    if precip is None:
+        raise IOError("requested quantity %s not found" % qty)
+
+    where = f["where"]
+    if isinstance(where.attrs["projdef"], str):
+        proj4str = where.attrs["projdef"]
+    else:
+        proj4str = where.attrs["projdef"].decode()
+    pr = pyproj.Proj(proj4str)
+
+    ll_lat = where.attrs["LL_lat"]
+    ll_lon = where.attrs["LL_lon"]
+    ur_lat = where.attrs["UR_lat"]
+    ur_lon = where.attrs["UR_lon"]
+    if (
+        "LR_lat" in where.attrs.keys()
+        and "LR_lon" in where.attrs.keys()
+        and "UL_lat" in where.attrs.keys()
+        and "UL_lon" in where.attrs.keys()
+    ):
+        lr_lat = float(where.attrs["LR_lat"])
+        lr_lon = float(where.attrs["LR_lon"])
+        ul_lat = float(where.attrs["UL_lat"])
+        ul_lon = float(where.attrs["UL_lon"])
+        full_cornerpts = True
+    else:
+        full_cornerpts = False
+
+    ll_x, ll_y = pr(ll_lon, ll_lat)
+    ur_x, ur_y = pr(ur_lon, ur_lat)
+
+    if full_cornerpts:
+        lr_x, lr_y = pr(lr_lon, lr_lat)
+        ul_x, ul_y = pr(ul_lon, ul_lat)
+        x1 = min(ll_x, ul_x)
+        y1 = min(ll_y, lr_y)
+        x2 = max(lr_x, ur_x)
+        y2 = max(ul_y, ur_y)
+    else:
+        x1 = ll_x
+        y1 = ll_y
+        x2 = ur_x
+        y2 = ur_y
+
+    dataset1 = f["dataset1"]
+
+    if "xscale" in where.attrs.keys() and "yscale" in where.attrs.keys():
+        xpixelsize = where.attrs["xscale"]
+        ypixelsize = where.attrs["yscale"]
+    elif (
+        "xscale" in dataset1["where"].attrs.keys()
+        and "yscale" in dataset1["where"].attrs.keys()
+    ):
+        where = dataset1["where"]
+        xpixelsize = where.attrs["xscale"]
+        ypixelsize = where.attrs["yscale"]
+    else:
+        xpixelsize = None
+        ypixelsize = None
+
+    if qty == "ACRR":
+        unit = "mm"
+        transform = None
+    elif qty == "DBZH":
+        unit = "dBZ"
+        transform = "dB"
+    else:
+        unit = "mm/h"
+        transform = None
+
+    metadata = {
+        "projection": proj4str,
+        "ll_lon": ll_lon,
+        "ll_lat": ll_lat,
+        "ur_lon": ur_lon,
+        "ur_lat": ur_lat,
+        "x1": x1,
+        "y1": y1,
+        "x2": x2,
+        "y2": y2,
+        "xpixelsize": xpixelsize,
+        "ypixelsize": ypixelsize,
+        "cartesian_unit": "m",
+        "yorigin": "lower",
+        "institution": "Deutscher Wetterdienst",
+        "accutime": None,
+        "unit": unit,
+        "transform": transform,
+        "zerovalue": np.nanmin(precip),
+        "threshold": _get_threshold_value(precip),
+    }
+
+    metadata.update(kwargs)
+
+    f.close()
+
+    return precip, quality, metadata
+
+
+@postprocess_import()
+def import_dwd_radolan(filename, product):
+    """
+    Import a RADOLAN precipitation product from a binary file.
+
+    Parameters
+    ----------
+    filename: str
+        Name of the file to import.
+    product: {'WX','RX','EX','RY','RW','AY','RS','YW','WN'}
+        The specific product to read from the file. Please see
+        https://www.dwd.de/DE/leistungen/radolan/radolan_info/
+        radolan_radvor_op_komposit_format_pdf.pdf
+        for a detailed description.
+
+    {extra_kwargs_doc}
+
+    Returns
+    -------
+    out: tuple
+        A three-element tuple containing the precipitation field in mm/h imported
+        from a MeteoSwiss gif file and the associated quality field and metadata.
+        The quality field is currently set to None.
+    """
+    size_file = os.path.getsize(filename)
+    size_data = np.round(size_file, -3)
+    size_header = size_file - size_data
+
+    f = open(filename, "rb")
+    header = f.read(size_header).decode("utf-8")
+
+    prod = header[:2]
+
+    assert prod == product, "Product not in File!"
+
+    prod_cat1 = np.array(["WX", "RX", "EX"])
+    prod_cat2 = np.array(["RY", "RW", "AY", "RS", "YW"])
+
+    nbyte = 1 if prod in prod_cat1 else 2
+    signed = "B" if prod in prod_cat1 else "H"
+
+    fac = int(header.split("E-")[1].split("INT")[0])
+    dimsplit = header.split("x")
+    dims = np.array((dimsplit[0][-4:], dimsplit[1][:4]), dtype=int)[::-1]
+
+    data = array.array(signed)
+    data.fromfile(f, size_data // nbyte)
+
+    f.close()
+
+    data = np.array(np.reshape(data, dims, order="F"), dtype=float).T
+
+    if prod == "SF":
+        no_echo_value = 0.0
+    elif prod in prod_cat2:
+        no_echo_value = -0.01
+    else:
+        no_echo_value = -32.5
+
+    if prod in prod_cat1:
+        data[data >= 249] = np.nan
+        data = data / 2.0 + no_echo_value
+    elif prod in prod_cat2:
+        data, no_data_mask = _identify_info_bits(data)
+        if prod == "AY":
+            data = (10 ** (fac * -1)) * data / 2.0 + no_echo_value
+        else:
+            data = (10 ** (fac * -1)) * data + no_echo_value
+    else:
+        data, no_data_mask = _identify_info_bits(data)
+        data = (10 ** (fac * -1)) * data / 2.0 + no_echo_value
+
+    data[no_data_mask] = np.nan
+
+    geodata = _import_dwd_geodata(product, dims)
+    metadata = geodata
+
+    return data, None, metadata
+
+
+def _identify_info_bits(data):
+
+    # clutter
+    clutter_mask = data - 2**15 >= 0.0
+    data[clutter_mask] = 0
+
+    # negative values
+    mask = data - 2**14 >= 0.0
+    data[mask] -= 2**14
+
+    # no data
+    no_data_mask = data - 2**13 == 2500.0
+
+    if np.sum(no_data_mask) == 0.0:
+
+        no_data_mask = data - 2**13 == 0.0
+
+    data[no_data_mask] = 0.0
+
+    # secondary data
+    data[data - 2**12 > 0.0] -= 2**12
+
+    data[mask] *= -1
+
+    return data, no_data_mask
+
+
+def _import_dwd_geodata(product, dims):
+    """
+    Geodata for RADOLAN products. Hard-coded here, cause there is only basic
+    information about the projection in the header of the binary RADOLAN
+    files.
+    """
+
+    geodata = {}
+
+    projdef = "+a=6378137.0 +b=6356752.0 +proj=stere +lat_ts=60.0 +lat_0=90.0 +lon_0=10.0 +x_0=0 +y_0=0"
+    geodata["projection"] = projdef
+
+    geodata["xpixelsize"] = 1000.0
+    geodata["ypixelsize"] = 1000.0
+    geodata["cartesian_unit"] = "m"
+    geodata["yorigin"] = "lower"
+
+    prod_cat1 = ["RY", "RW", "RX"]  # 900x900
+    prod_cat2 = ["WN"]  # 1100x1200
+    prod_cat3 = ["YW"]  # 900x1100
+
+    if product in prod_cat1:  # lower left
+        lon = 3.604382995
+        lat = 46.95361536
+    if product in prod_cat2:  # lower left
+        lon = 3.566994635
+        lat = 45.69642538
+    if product in prod_cat3:  # center
+        lon = 9.0
+        lat = 51.0
+
+    pr = pyproj.Proj(projdef)
+    x1, y1 = pr(lon, lat)
+    if product in prod_cat3:
+        x1 -= dims[0] * 1000 // 2
+        y1 -= dims[1] * 1000 // 2 - 80000
+
+    x2 = x1 + dims[0] * 1000
+    y2 = y1 + dims[1] * 1000
+
+    geodata["x1"] = x1
+    geodata["y1"] = y1
+    geodata["x2"] = x2
+    geodata["y2"] = y2
+
+    return geodata

From 1e98e4f6b4d2e25df66c4a94dc4e5cc2ed6828ea Mon Sep 17 00:00:00 2001
From: m-rempel <martin.rempel@gmx.de>
Date: Tue, 3 Jun 2025 08:53:29 +0000
Subject: [PATCH 2/4] Update DWD HDF5 Importer

---
 pysteps/io/importers.py | 225 ++++++++++++++++++++++------------------
 1 file changed, 126 insertions(+), 99 deletions(-)

diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py
index 4a37f555..52ebe66a 100644
--- a/pysteps/io/importers.py
+++ b/pysteps/io/importers.py
@@ -82,11 +82,14 @@
     import_odim_hdf5
     import_opera_hdf5
     import_saf_crri
+    import_dwd_hdf5
+    import_dwd_radolan
 """
 
 import gzip
 import os
 import array
+import datetime
 from functools import partial
 
 import numpy as np
@@ -1663,91 +1666,46 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
     precip = None
     quality = None
 
-    for dsg in f.items():
-        if dsg[0].startswith("dataset"):
-            what_grp_found = False
-            # check if the "what" group is in the "dataset" group
-            if "what" in list(dsg[1].keys()):
-                if "quantity" in dsg[1]["what"].attrs.keys():
-                    try:
-                        qty_, gain, offset, nodata, undetect = (
-                            _read_opera_hdf5_what_group(dsg[1]["what"])
-                        )
-                        what_grp_found = True
-                    except KeyError:
-                        pass
+    file_content = {}
+    _read_hdf5_cont(f, file_content)
+    f.close()
 
-            for dg in dsg[1].items():
-                if dg[0][0:4] == "data":
-                    # check if the "what" group is in the "data" group
-                    if "what" in list(dg[1].keys()):
-                        (
-                            qty_,
-                            gain,
-                            offset,
-                            nodata,
-                            undetect,
-                        ) = _read_opera_hdf5_what_group(dg[1]["what"])
-                    elif not what_grp_found:
-                        raise DataModelError(
-                            "Non ODIM compliant file: "
-                            "no what group found from {} "
-                            "or its subgroups".format(dg[0])
-                        )
+    data_prop = {}
+    _get_whatgrp(file_content, data_prop)
 
-                    if qty_.decode() in [qty, "QIND"]:
-                        arr = dg[1]["data"][...]
-                        mask_n = arr == nodata
-                        mask_u = arr == undetect
-                        mask = np.logical_and(~mask_u, ~mask_n)
+    arr = file_content["dataset1"]["data1"]["data"]
+    mask_n = arr == data_prop["nodata"]
+    mask_u = arr == data_prop["undetect"]
+    mask = np.logical_and(~mask_u, ~mask_n)
 
-                        if qty_.decode() == qty:
-                            precip = np.empty(arr.shape)
-                            precip[mask] = arr[mask] * gain + offset
-                            if qty != "DBZH":
-                                precip[mask_u] = offset
-                            else:
-                                precip[mask_u] = -30.0
-                            precip[mask_n] = np.nan
-                        elif qty_.decode() == "QIND":
-                            quality = np.empty(arr.shape, dtype=float)
-                            quality[mask] = arr[mask]
-                            quality[~mask] = np.nan
+    if data_prop["quantity"] == qty:
+        precip = np.empty(arr.shape)
+        precip[mask] = arr[mask] * data_prop["gain"] + data_prop["offset"]
+        if qty != "DBZH":
+            precip[mask_u] = data_prop["offset"]
+        else:
+            precip[mask_u] = -32.5
+        precip[mask_n] = np.nan
+    elif data_prop["quantity"] == "QIND":
+        quality = np.empty(arr.shape, dtype=float)
+        quality[mask] = arr[mask]
+        quality[~mask] = np.nan
 
     if precip is None:
         raise IOError("requested quantity %s not found" % qty)
 
-    where = f["where"]
-    if isinstance(where.attrs["projdef"], str):
-        proj4str = where.attrs["projdef"]
-    else:
-        proj4str = where.attrs["projdef"].decode()
-    pr = pyproj.Proj(proj4str)
+    pr = pyproj.Proj(file_content["where"]["projdef"])
 
-    ll_lat = where.attrs["LL_lat"]
-    ll_lon = where.attrs["LL_lon"]
-    ur_lat = where.attrs["UR_lat"]
-    ur_lon = where.attrs["UR_lon"]
-    if (
-        "LR_lat" in where.attrs.keys()
-        and "LR_lon" in where.attrs.keys()
-        and "UL_lat" in where.attrs.keys()
-        and "UL_lon" in where.attrs.keys()
-    ):
-        lr_lat = float(where.attrs["LR_lat"])
-        lr_lon = float(where.attrs["LR_lon"])
-        ul_lat = float(where.attrs["UL_lat"])
-        ul_lon = float(where.attrs["UL_lon"])
-        full_cornerpts = True
-    else:
-        full_cornerpts = False
-
-    ll_x, ll_y = pr(ll_lon, ll_lat)
-    ur_x, ur_y = pr(ur_lon, ur_lat)
+    ll_x, ll_y = pr(file_content["where"]["LL_lon"], file_content["where"]["LL_lat"])
+    ur_x, ur_y = pr(file_content["where"]["UR_lon"], file_content["where"]["UR_lat"])
 
-    if full_cornerpts:
-        lr_x, lr_y = pr(lr_lon, lr_lat)
-        ul_x, ul_y = pr(ul_lon, ul_lat)
+    if len([k for k in file_content["where"].keys() if "_lat" in k]) == 4:
+        lr_x, lr_y = pr(
+            file_content["where"]["LR_lon"], file_content["where"]["LR_lat"]
+        )
+        ul_x, ul_y = pr(
+            file_content["where"]["UL_lon"], file_content["where"]["UL_lat"]
+        )
         x1 = min(ll_x, ul_x)
         y1 = min(ll_y, lr_y)
         x2 = max(lr_x, ur_x)
@@ -1758,18 +1716,15 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
         x2 = ur_x
         y2 = ur_y
 
-    dataset1 = f["dataset1"]
-
-    if "xscale" in where.attrs.keys() and "yscale" in where.attrs.keys():
-        xpixelsize = where.attrs["xscale"]
-        ypixelsize = where.attrs["yscale"]
-    elif (
-        "xscale" in dataset1["where"].attrs.keys()
-        and "yscale" in dataset1["where"].attrs.keys()
+    if (
+        "where" in file_content["dataset1"].keys()
+        and "xscale" in file_content["dataset1"]["where"].keys()
     ):
-        where = dataset1["where"]
-        xpixelsize = where.attrs["xscale"]
-        ypixelsize = where.attrs["yscale"]
+        xpixelsize = file_content["dataset1"]["where"]["xscale"]
+        ypixelsize = file_content["dataset1"]["where"]["yscale"]
+    elif "xscale" in file_content["where"].keys():
+        xpixelsize = file_content["where"]["xscale"]
+        ypixelsize = file_content["where"]["yscale"]
     else:
         xpixelsize = None
         ypixelsize = None
@@ -1784,12 +1739,22 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
         unit = "mm/h"
         transform = None
 
+    startdate = datetime.datetime.strptime(
+        f"{file_content['dataset1']['what']['startdate']}{file_content['dataset1']['what']['starttime']}",
+        "%Y%m%d%H%M%S",
+    )
+    enddate = datetime.datetime.strptime(
+        f"{file_content['dataset1']['what']['enddate']}{file_content['dataset1']['what']['endtime']}",
+        "%Y%m%d%H%M%S",
+    )
+    accutime = (enddate - startdate).total_seconds() / 60.0
+
     metadata = {
-        "projection": proj4str,
-        "ll_lon": ll_lon,
-        "ll_lat": ll_lat,
-        "ur_lon": ur_lon,
-        "ur_lat": ur_lat,
+        "projection": file_content["where"]["projdef"],
+        "ll_lon": file_content["where"]["LL_lon"],
+        "ll_lat": file_content["where"]["LL_lat"],
+        "ur_lon": file_content["where"]["UR_lon"],
+        "ur_lat": file_content["where"]["UR_lat"],
         "x1": x1,
         "y1": y1,
         "x2": x2,
@@ -1798,8 +1763,8 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
         "ypixelsize": ypixelsize,
         "cartesian_unit": "m",
         "yorigin": "lower",
-        "institution": "Deutscher Wetterdienst",
-        "accutime": None,
+        "institution": file_content["what"]["source"],
+        "accutime": accutime,
         "unit": unit,
         "transform": transform,
         "zerovalue": np.nanmin(precip),
@@ -1813,6 +1778,68 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
     return precip, quality, metadata
 
 
+def _read_hdf5_cont(f, d):
+    """
+    Recursively read nested dictionaries from a HDF5 file.
+    """
+
+    # set simple types of hdf content
+    group_type = h5py._hl.group.Group
+
+    for key, value in f.items():
+
+        if isinstance(value, group_type):
+
+            d[key] = {}
+
+            if len(list(value.items())) > 0:
+
+                _read_hdf5_cont(value, d[key])
+
+            else:
+
+                d[key] = {attr: value.attrs[attr] for attr in value.attrs}
+                d[key] = {
+                    k: v.decode() if type(v) == np.bytes_ else v
+                    for k, v in d[key].items()
+                }
+
+        else:
+
+            try:
+
+                d[key] = np.array(value)
+
+            except TypeError:
+
+                d[key] = np.array(value.astype(float))
+
+            except:
+
+                d[key] = value.value
+
+    return
+
+
+def _get_whatgrp(d, g):
+    """
+    Recursively get the what group containing the data field properties.
+    """
+    if "what" in d.keys():
+        if "gain" in d["what"].keys():
+            g.update(d["what"])
+        else:
+            k = [k for k in d.keys() if "data" in k][0]
+            _get_whatgrp(d[k], g)
+    else:
+        raise DataModelError(
+            "Non ODIM compliant file: "
+            "no what group found from {} "
+            "or its subgroups".format(d.keys()[0])
+        )
+    return
+
+
 @postprocess_import()
 def import_dwd_radolan(filename, product):
     """
@@ -1848,8 +1875,8 @@ def import_dwd_radolan(filename, product):
 
     assert prod == product, "Product not in File!"
 
-    prod_cat1 = np.array(["WX", "RX", "EX"])
-    prod_cat2 = np.array(["RY", "RW", "AY", "RS", "YW"])
+    prod_cat1 = np.array(["WX", "RX"])
+    prod_cat2 = np.array(["RY", "RW", "YW"])
 
     nbyte = 1 if prod in prod_cat1 else 2
     signed = "B" if prod in prod_cat1 else "H"
@@ -1937,9 +1964,9 @@ def _import_dwd_geodata(product, dims):
     geodata["cartesian_unit"] = "m"
     geodata["yorigin"] = "lower"
 
-    prod_cat1 = ["RY", "RW", "RX"]  # 900x900
-    prod_cat2 = ["WN"]  # 1100x1200
-    prod_cat3 = ["YW"]  # 900x1100
+    prod_cat1 = ["RX", "RY", "RW"]  # 900x900
+    prod_cat2 = ["WN"]  # 1200x1100
+    prod_cat3 = ["WX", "YW"]  # 1100x900
 
     if product in prod_cat1:  # lower left
         lon = 3.604382995

From 9d47508642153749ca47b8b16d7f13ea3e19f97d Mon Sep 17 00:00:00 2001
From: m-rempel <martin.rempel@gmx.de>
Date: Tue, 24 Jun 2025 14:32:09 +0000
Subject: [PATCH 3/4] Add reprojection of unstructured grids

---
 pysteps/io/importers.py       |  17 +++--
 pysteps/utils/reprojection.py | 123 ++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py
index 52ebe66a..a9478354 100644
--- a/pysteps/io/importers.py
+++ b/pysteps/io/importers.py
@@ -1656,6 +1656,13 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
             "but it is not installed"
         )
 
+    if not PYPROJ_IMPORTED:
+        raise MissingOptionalDependency(
+            "pyproj package is required to import "
+            "DWD's radar reflectivity composite "
+            "but it is not installed"
+        )
+
     if qty not in ["ACRR", "DBZH", "RATE"]:
         raise ValueError(
             "unknown quantity %s: the available options are 'ACRR', 'DBZH' and 'RATE'"
@@ -1740,11 +1747,13 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
         transform = None
 
     startdate = datetime.datetime.strptime(
-        f"{file_content['dataset1']['what']['startdate']}{file_content['dataset1']['what']['starttime']}",
+        file_content["dataset1"]["what"]["startdate"]
+        + file_content["dataset1"]["what"]["starttime"],
         "%Y%m%d%H%M%S",
     )
     enddate = datetime.datetime.strptime(
-        f"{file_content['dataset1']['what']['enddate']}{file_content['dataset1']['what']['endtime']}",
+        file_content["dataset1"]["what"]["enddate"]
+        + file_content["dataset1"]["what"]["endtime"],
         "%Y%m%d%H%M%S",
     )
     accutime = (enddate - startdate).total_seconds() / 60.0
@@ -1762,7 +1771,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
         "xpixelsize": xpixelsize,
         "ypixelsize": ypixelsize,
         "cartesian_unit": "m",
-        "yorigin": "lower",
+        "yorigin": "upper",
         "institution": file_content["what"]["source"],
         "accutime": accutime,
         "unit": unit,
@@ -1962,7 +1971,7 @@ def _import_dwd_geodata(product, dims):
     geodata["xpixelsize"] = 1000.0
     geodata["ypixelsize"] = 1000.0
     geodata["cartesian_unit"] = "m"
-    geodata["yorigin"] = "lower"
+    geodata["yorigin"] = "upper"
 
     prod_cat1 = ["RX", "RY", "RW"]  # 900x900
     prod_cat2 = ["WN"]  # 1200x1100
diff --git a/pysteps/utils/reprojection.py b/pysteps/utils/reprojection.py
index 6144a52c..0123776e 100644
--- a/pysteps/utils/reprojection.py
+++ b/pysteps/utils/reprojection.py
@@ -12,8 +12,10 @@
     reproject_grids
 """
 from pysteps.exceptions import MissingOptionalDependency
+from scipy.interpolate import griddata
 
 import numpy as np
+import xarray as xr
 
 try:
     from rasterio import Affine as A
@@ -23,6 +25,13 @@
 except ImportError:
     RASTERIO_IMPORTED = False
 
+try:
+    import pyproj
+
+    PYPROJ_IMPORTED = True
+except ImportError:
+    PYPROJ_IMPORTED = False
+
 
 def reproject_grids(src_array, dst_array, metadata_src, metadata_dst):
     """
@@ -118,3 +127,117 @@ def reproject_grids(src_array, dst_array, metadata_src, metadata_dst):
         metadata[key] = metadata_dst[key]
 
     return r_rprj, metadata
+
+
+def unstructured2regular(src_array, metadata_src, metadata_dst):
+    """
+    Reproject unstructured data onto a regular grid.
+
+    Parameters
+    ----------
+    src_array: xarray
+        xarray of shape (t, n_ens, ngridcells) containing a time
+        series of precipitation enesemble forecasts. These precipitation fields
+        will be reprojected.
+    metadata_src: dict
+        Metadata dictionary containing the projection, clon, clat, and ngridcells
+        and attributes of the src_array as described in the documentation of
+        :py:mod:`pysteps.io.importers`.
+    metadata_dst: dict
+        Metadata dictionary containing the projection, x- and ypixelsize, x1 and
+        y2 attributes of the dst_array.
+
+    Returns
+    -------
+    r_rprj: xarray
+        Three-dimensional array of shape (t, n_ens, x, y) containing the
+        precipitation fields of src_array, but reprojected to the domain
+        of dst_array.
+    metadata: dict
+        Metadata dictionary containing the projection, x- and ypixelsize, x1 and
+        y2 attributes of the reprojected src_array.
+    """
+
+    if not PYPROJ_IMPORTED:
+        raise MissingOptionalDependency(
+            "pyproj package is required to reproject DWD's NWP data"
+            "but it is not installed"
+        )
+
+    # Get number of grid cells
+    Nc = src_array["longitude"].shape[0]
+    ic_in = np.arange(Nc)
+
+    # Get cartesian coordinates of destination grid
+    x_dst = np.arange(
+        np.float32(metadata_dst["x1"]),
+        np.float32(metadata_dst["x2"]),
+        metadata_dst["xpixelsize"],
+    )
+
+    y_dst = np.arange(
+        np.float32(metadata_dst["y1"]),
+        np.float32(metadata_dst["y2"]),
+        metadata_dst["ypixelsize"],
+    )
+
+    if metadata_dst["yorigin"] == "upper":
+        y_dst = y_dst[::-1]
+    xx_dst, yy_dst = np.meshgrid(x_dst, y_dst)
+    s_out = yy_dst.shape
+    P_out = np.array((xx_dst.flatten(), yy_dst.flatten())).T
+
+    # Extract the grid info from src_array assuming the same projection of src and dst
+    pr = pyproj.Proj(metadata_dst["projection"])
+    x_src, y_src = pr(src_array["longitude"].values, src_array["latitude"].values)
+    P_in = np.stack((x_src, y_src)).T
+
+    ic_out = (
+        griddata(P_in, ic_in.flatten(), P_out, method="nearest")
+        .reshape(s_out)
+        .astype(int)
+    )
+
+    data_rprj = np.array(
+        [
+            [src_array[i, j].values[ic_out] for j in range(src_array.shape[1])]
+            for i in range(src_array.shape[0])
+        ]
+    )
+    dims = ["time", "ens_no", "south_north", "west_east"]
+    lon, lat = pr(xx_dst, yy_dst, inverse=True)
+    coords = {
+        "time": src_array["time"],
+        "ens_no": src_array["ens_no"],
+        "west_east": np.arange(0, len(x_dst), 1),
+        "south_north": np.arange(0, len(y_dst), 1),
+        "x": ("west_east", np.arange(0, len(x_dst), 1), {"units": "1"}),
+        "y": ("south_north", np.arange(0, len(y_dst), 1), {"units": "1"}),
+        "projection_x_coordinate": ("west_east", x_dst, {"units": "m"}),
+        "projection_y_coordinate": ("south_north", y_dst, {"units": "m"}),
+        "longitude": (
+            ["south_north", "west_east"],
+            lon,
+            {"units": "degrees_north"},
+        ),
+        "latitude": (["south_north", "west_east"], lat, {"units": "degrees_east"}),
+    }
+    xr_rprj = xr.DataArray(data=data_rprj, dims=dims, coords=coords)
+
+    # Update the metadata
+    metadata = metadata_src.copy()
+
+    for key in [
+        "projection",
+        "yorigin",
+        "xpixelsize",
+        "ypixelsize",
+        "x1",
+        "x2",
+        "y1",
+        "y2",
+        "cartesian_unit",
+    ]:
+        metadata[key] = metadata_dst[key]
+
+    return xr_rprj, metadata

From f4b150666801165dcf5069218cbcbf45dbfd909d Mon Sep 17 00:00:00 2001
From: Ruben Imhoff <ruben.imhoff@deltares.nl>
Date: Tue, 1 Jul 2025 16:30:54 +0200
Subject: [PATCH 4/4] docs: add docstrings to function and run black

---
 pysteps/io/importers.py | 151 ++++++++++++++++++++++++++++------------
 1 file changed, 106 insertions(+), 45 deletions(-)

diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py
index a9478354..42d1be9c 100644
--- a/pysteps/io/importers.py
+++ b/pysteps/io/importers.py
@@ -1668,8 +1668,8 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
             "unknown quantity %s: the available options are 'ACRR', 'DBZH' and 'RATE'"
         )
 
+    # Read the data
     f = h5py.File(filename, "r")
-
     precip = None
     quality = None
 
@@ -1685,6 +1685,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
     mask_u = arr == data_prop["undetect"]
     mask = np.logical_and(~mask_u, ~mask_n)
 
+    # Read the precipitation variable and quantity from the data
     if data_prop["quantity"] == qty:
         precip = np.empty(arr.shape)
         precip[mask] = arr[mask] * data_prop["gain"] + data_prop["offset"]
@@ -1701,8 +1702,8 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
     if precip is None:
         raise IOError("requested quantity %s not found" % qty)
 
+    # Get the projection and grid information from the HDF5 file
     pr = pyproj.Proj(file_content["where"]["projdef"])
-
     ll_x, ll_y = pr(file_content["where"]["LL_lon"], file_content["where"]["LL_lat"])
     ur_x, ur_y = pr(file_content["where"]["UR_lon"], file_content["where"]["UR_lat"])
 
@@ -1723,6 +1724,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
         x2 = ur_x
         y2 = ur_y
 
+    # Get the grid cell size
     if (
         "where" in file_content["dataset1"].keys()
         and "xscale" in file_content["dataset1"]["where"].keys()
@@ -1736,6 +1738,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
         xpixelsize = None
         ypixelsize = None
 
+    # Get the unit and transform
     if qty == "ACRR":
         unit = "mm"
         transform = None
@@ -1746,6 +1749,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
         unit = "mm/h"
         transform = None
 
+    # Extract the time step
     startdate = datetime.datetime.strptime(
         file_content["dataset1"]["what"]["startdate"]
         + file_content["dataset1"]["what"]["starttime"],
@@ -1758,6 +1762,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
     )
     accutime = (enddate - startdate).total_seconds() / 60.0
 
+    # Finally, fill out the metadata
     metadata = {
         "projection": file_content["where"]["projdef"],
         "ll_lon": file_content["where"]["LL_lon"],
@@ -1790,23 +1795,30 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs):
 def _read_hdf5_cont(f, d):
     """
     Recursively read nested dictionaries from a HDF5 file.
-    """
 
+
+    Parameters:
+    -----------
+    f : h5py.Group or h5py.File
+        The current group or file object from which to read data.
+    d : dict
+        The dictionary to populate with the contents of the HDF5 group.
+
+    Returns:
+    --------
+    None.
+    """
     # set simple types of hdf content
     group_type = h5py._hl.group.Group
 
     for key, value in f.items():
-
         if isinstance(value, group_type):
-
             d[key] = {}
-
             if len(list(value.items())) > 0:
-
+                # Recurse into non-empty group
                 _read_hdf5_cont(value, d[key])
-
             else:
-
+                # Handle empty group with attributes
                 d[key] = {attr: value.attrs[attr] for attr in value.attrs}
                 d[key] = {
                     k: v.decode() if type(v) == np.bytes_ else v
@@ -1814,17 +1826,11 @@ def _read_hdf5_cont(f, d):
                 }
 
         else:
-
             try:
-
                 d[key] = np.array(value)
-
             except TypeError:
-
                 d[key] = np.array(value.astype(float))
-
             except:
-
                 d[key] = value.value
 
     return
@@ -1873,34 +1879,41 @@ def import_dwd_radolan(filename, product):
         from a MeteoSwiss gif file and the associated quality field and metadata.
         The quality field is currently set to None.
     """
+    # Determine file size and header size
     size_file = os.path.getsize(filename)
     size_data = np.round(size_file, -3)
     size_header = size_file - size_data
 
+    # Read the file
     f = open(filename, "rb")
     header = f.read(size_header).decode("utf-8")
 
+    # Check if the product code is the same as the provided product variable
     prod = header[:2]
-
     assert prod == product, "Product not in File!"
 
+    # Define product categories
     prod_cat1 = np.array(["WX", "RX"])
     prod_cat2 = np.array(["RY", "RW", "YW"])
 
+    # Determine byte size and data type
     nbyte = 1 if prod in prod_cat1 else 2
     signed = "B" if prod in prod_cat1 else "H"
 
+    # Extract the scaling factor and grid dimensions
     fac = int(header.split("E-")[1].split("INT")[0])
     dimsplit = header.split("x")
     dims = np.array((dimsplit[0][-4:], dimsplit[1][:4]), dtype=int)[::-1]
 
+    # Read binary data
     data = array.array(signed)
     data.fromfile(f, size_data // nbyte)
-
     f.close()
 
+    # Reshape and transpose data to match grid layout
     data = np.array(np.reshape(data, dims, order="F"), dtype=float).T
 
+    # Define no-echo values based on product type
     if prod == "SF":
         no_echo_value = 0.0
     elif prod in prod_cat2:
@@ -1908,21 +1921,24 @@ def import_dwd_radolan(filename, product):
     else:
         no_echo_value = -32.5
 
+    # Apply scaling and handle missing data
     if prod in prod_cat1:
         data[data >= 249] = np.nan
         data = data / 2.0 + no_echo_value
     elif prod in prod_cat2:
         data, no_data_mask = _identify_info_bits(data)
         if prod == "AY":
-            data = (10 ** (fac * -1)) * data / 2.0 + no_echo_value
+            data = (10 ** (-fac)) * data / 2.0 + no_echo_value
         else:
-            data = (10 ** (fac * -1)) * data + no_echo_value
+            data = (10 ** (-fac)) * data + no_echo_value
     else:
         data, no_data_mask = _identify_info_bits(data)
-        data = (10 ** (fac * -1)) * data / 2.0 + no_echo_value
+        data = (10 ** (-fac)) * data / 2.0 + no_echo_value
 
+    # Mask out no-data values
     data[no_data_mask] = np.nan
 
+    # Load geospatial metadata
     geodata = _import_dwd_geodata(product, dims)
     metadata = geodata
 
@@ -1930,27 +1946,47 @@ def import_dwd_radolan(filename, product):
 
 
 def _identify_info_bits(data):
+    """
+    Identifies and processes information bits embedded in RADOLAN data values.
 
-    # clutter
+    This function decodes metadata flags embedded in the RADOLAN data array, such as:
+    - Clutter (bit 15)
+    - Negative values (bit 14)
+    - No data (bit 13)
+    - Secondary data (bit 12)
+
+    Parameters
+    ----------
+    data : np.ndarray
+        The raw RADOLAN data array containing encoded information bits.
+
+    Returns
+    -------
+    tuple
+        A tuple containing:
+        - data : np.ndarray
+            The cleaned and decoded data array.
+        - no_data_mask : np.ndarray
+            A boolean mask indicating positions of no-data values.
+    """
+    # Identify and remove clutter (bit 15)
     clutter_mask = data - 2**15 >= 0.0
     data[clutter_mask] = 0
 
-    # negative values
+    # Identify and convert negative values (bit 14)
     mask = data - 2**14 >= 0.0
     data[mask] -= 2**14
 
-    # no data
+    # Identify no-data values (bit 13)
     no_data_mask = data - 2**13 == 2500.0
-
     if np.sum(no_data_mask) == 0.0:
-
         no_data_mask = data - 2**13 == 0.0
-
     data[no_data_mask] = 0.0
 
-    # secondary data
+    # Identify and remove secondary data flag (bit 12)
     data[data - 2**12 > 0.0] -= 2**12
 
+    # Apply negative sign to previously marked negative values
     data[mask] *= -1
 
     return data, no_data_mask
@@ -1958,41 +1994,66 @@ def _identify_info_bits(data):
 
 def _import_dwd_geodata(product, dims):
     """
-    Geodata for RADOLAN products. Hard-coded here, cause there is only basic
-    information about the projection in the header of the binary RADOLAN
-    files.
-    """
+    Generate geospatial metadata for RADOLAN precipitation products.
 
+    Since RADOLAN binary files contain only limited projection metadata,
+    this function provides hard-coded geospatial definitions and calculates
+    the bounding box of the data grid based on the product type and dimensions.
+
+    Parameters
+    ----------
+    product : str
+        The RADOLAN product code (e.g., 'RX', 'WX', 'WN', etc.).
+    dims : tuple of int
+        The dimensions of the data grid (rows, columns).
+
+    Returns
+    -------
+    geodata : dict
+        A dictionary containing:
+        - 'projection': PROJ.4 string defining the stereographic projection.
+        - 'xpixelsize', 'ypixelsize': Pixel size in meters.
+        - 'cartesian_unit': Unit of the coordinate system (meters).
+        - 'yorigin': Origin of the y-axis ('upper').
+        - 'x1', 'y1': Coordinates of the lower-left corner.
+        - 'x2', 'y2': Coordinates of the upper-right corner.
+    """
     geodata = {}
 
-    projdef = "+a=6378137.0 +b=6356752.0 +proj=stere +lat_ts=60.0 +lat_0=90.0 +lon_0=10.0 +x_0=0 +y_0=0"
+    # Define stereographic projection used by RADOLAN
+    projdef = (
+        "+a=6378137.0 +b=6356752.0 +proj=stere +lat_ts=60.0 "
+        "+lat_0=90.0 +lon_0=10.0 +x_0=0 +y_0=0"
+    )
     geodata["projection"] = projdef
-
     geodata["xpixelsize"] = 1000.0
     geodata["ypixelsize"] = 1000.0
     geodata["cartesian_unit"] = "m"
     geodata["yorigin"] = "upper"
 
-    prod_cat1 = ["RX", "RY", "RW"]  # 900x900
-    prod_cat2 = ["WN"]  # 1200x1100
-    prod_cat3 = ["WX", "YW"]  # 1100x900
+    # Define product categories and their reference points
+    prod_cat1 = ["RX", "RY", "RW"]
+    prod_cat2 = ["WN"]
+    prod_cat3 = ["WX", "YW"]
 
-    if product in prod_cat1:  # lower left
-        lon = 3.604382995
-        lat = 46.95361536
-    if product in prod_cat2:  # lower left
-        lon = 3.566994635
-        lat = 45.69642538
-    if product in prod_cat3:  # center
-        lon = 9.0
-        lat = 51.0
+    # Assign reference coordinates based on product type
+    if product in prod_cat1:
+        lon, lat = 3.604382995, 46.95361536
+    elif product in prod_cat2:
+        lon, lat = 3.566994635, 45.69642538
+    elif product in prod_cat3:
+        lon, lat = 9.0, 51.0
 
+    # Project reference coordinates to Cartesian system
     pr = pyproj.Proj(projdef)
     x1, y1 = pr(lon, lat)
+
+    # Adjust origin for center-based products
     if product in prod_cat3:
         x1 -= dims[0] * 1000 // 2
         y1 -= dims[1] * 1000 // 2 - 80000
 
+    # Calculate bounding box
     x2 = x1 + dims[0] * 1000
     y2 = y1 + dims[1] * 1000