From c466b9c1f833d8b7b755839235e9031f4e4b0427 Mon Sep 17 00:00:00 2001 From: m-rempel Date: Tue, 27 May 2025 10:30:12 +0000 Subject: [PATCH 1/4] Add importer for DWD radar products --- pysteps/io/importers.py | 349 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 349 insertions(+) diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py index 9cfaa17f..4a37f555 100644 --- a/pysteps/io/importers.py +++ b/pysteps/io/importers.py @@ -86,6 +86,7 @@ import gzip import os +import array from functools import partial import numpy as np @@ -1617,3 +1618,351 @@ def _import_saf_crri_geodata(filename): ds_rainfall.close() return geodata + + +@postprocess_import() +def import_dwd_hdf5(filename, qty="RATE", **kwargs): + """ + Import a DWD precipitation product field (and optionally the quality + field) from a HDF5 file conforming to the ODIM specification + + Parameters + ---------- + filename: str + Name of the file to import. + qty: {'RATE', 'ACRR', 'DBZH'} + The quantity to read from the file. The currently supported identitiers + are: 'RATE'=instantaneous rain rate (mm/h), 'ACRR'=hourly rainfall + accumulation (mm) and 'DBZH'=max-reflectivity (dBZ). The default value + is 'RATE'. + + {extra_kwargs_doc} + + Returns + ------- + out: tuple + A three-element tuple containing the DWD preciptiaton product with the + requested quantity and the associated quality field and metadata. The + quality field is read from the file if it contains a dataset whose + quantity identifier is 'QIND'. + """ + if not H5PY_IMPORTED: + raise MissingOptionalDependency( + "h5py package is required to import " + "radar reflectivity composites using ODIM HDF5 specification " + "but it is not installed" + ) + + if qty not in ["ACRR", "DBZH", "RATE"]: + raise ValueError( + "unknown quantity %s: the available options are 'ACRR', 'DBZH' and 'RATE'" + ) + + f = h5py.File(filename, "r") + + precip = None + quality = None + + for dsg in f.items(): + if dsg[0].startswith("dataset"): + what_grp_found = False + # check if the "what" group is in the "dataset" group + if "what" in list(dsg[1].keys()): + if "quantity" in dsg[1]["what"].attrs.keys(): + try: + qty_, gain, offset, nodata, undetect = ( + _read_opera_hdf5_what_group(dsg[1]["what"]) + ) + what_grp_found = True + except KeyError: + pass + + for dg in dsg[1].items(): + if dg[0][0:4] == "data": + # check if the "what" group is in the "data" group + if "what" in list(dg[1].keys()): + ( + qty_, + gain, + offset, + nodata, + undetect, + ) = _read_opera_hdf5_what_group(dg[1]["what"]) + elif not what_grp_found: + raise DataModelError( + "Non ODIM compliant file: " + "no what group found from {} " + "or its subgroups".format(dg[0]) + ) + + if qty_.decode() in [qty, "QIND"]: + arr = dg[1]["data"][...] + mask_n = arr == nodata + mask_u = arr == undetect + mask = np.logical_and(~mask_u, ~mask_n) + + if qty_.decode() == qty: + precip = np.empty(arr.shape) + precip[mask] = arr[mask] * gain + offset + if qty != "DBZH": + precip[mask_u] = offset + else: + precip[mask_u] = -30.0 + precip[mask_n] = np.nan + elif qty_.decode() == "QIND": + quality = np.empty(arr.shape, dtype=float) + quality[mask] = arr[mask] + quality[~mask] = np.nan + + if precip is None: + raise IOError("requested quantity %s not found" % qty) + + where = f["where"] + if isinstance(where.attrs["projdef"], str): + proj4str = where.attrs["projdef"] + else: + proj4str = where.attrs["projdef"].decode() + pr = pyproj.Proj(proj4str) + + ll_lat = where.attrs["LL_lat"] + ll_lon = where.attrs["LL_lon"] + ur_lat = where.attrs["UR_lat"] + ur_lon = where.attrs["UR_lon"] + if ( + "LR_lat" in where.attrs.keys() + and "LR_lon" in where.attrs.keys() + and "UL_lat" in where.attrs.keys() + and "UL_lon" in where.attrs.keys() + ): + lr_lat = float(where.attrs["LR_lat"]) + lr_lon = float(where.attrs["LR_lon"]) + ul_lat = float(where.attrs["UL_lat"]) + ul_lon = float(where.attrs["UL_lon"]) + full_cornerpts = True + else: + full_cornerpts = False + + ll_x, ll_y = pr(ll_lon, ll_lat) + ur_x, ur_y = pr(ur_lon, ur_lat) + + if full_cornerpts: + lr_x, lr_y = pr(lr_lon, lr_lat) + ul_x, ul_y = pr(ul_lon, ul_lat) + x1 = min(ll_x, ul_x) + y1 = min(ll_y, lr_y) + x2 = max(lr_x, ur_x) + y2 = max(ul_y, ur_y) + else: + x1 = ll_x + y1 = ll_y + x2 = ur_x + y2 = ur_y + + dataset1 = f["dataset1"] + + if "xscale" in where.attrs.keys() and "yscale" in where.attrs.keys(): + xpixelsize = where.attrs["xscale"] + ypixelsize = where.attrs["yscale"] + elif ( + "xscale" in dataset1["where"].attrs.keys() + and "yscale" in dataset1["where"].attrs.keys() + ): + where = dataset1["where"] + xpixelsize = where.attrs["xscale"] + ypixelsize = where.attrs["yscale"] + else: + xpixelsize = None + ypixelsize = None + + if qty == "ACRR": + unit = "mm" + transform = None + elif qty == "DBZH": + unit = "dBZ" + transform = "dB" + else: + unit = "mm/h" + transform = None + + metadata = { + "projection": proj4str, + "ll_lon": ll_lon, + "ll_lat": ll_lat, + "ur_lon": ur_lon, + "ur_lat": ur_lat, + "x1": x1, + "y1": y1, + "x2": x2, + "y2": y2, + "xpixelsize": xpixelsize, + "ypixelsize": ypixelsize, + "cartesian_unit": "m", + "yorigin": "lower", + "institution": "Deutscher Wetterdienst", + "accutime": None, + "unit": unit, + "transform": transform, + "zerovalue": np.nanmin(precip), + "threshold": _get_threshold_value(precip), + } + + metadata.update(kwargs) + + f.close() + + return precip, quality, metadata + + +@postprocess_import() +def import_dwd_radolan(filename, product): + """ + Import a RADOLAN precipitation product from a binary file. + + Parameters + ---------- + filename: str + Name of the file to import. + product: {'WX','RX','EX','RY','RW','AY','RS','YW','WN'} + The specific product to read from the file. Please see + https://www.dwd.de/DE/leistungen/radolan/radolan_info/ + radolan_radvor_op_komposit_format_pdf.pdf + for a detailed description. + + {extra_kwargs_doc} + + Returns + ------- + out: tuple + A three-element tuple containing the precipitation field in mm/h imported + from a MeteoSwiss gif file and the associated quality field and metadata. + The quality field is currently set to None. + """ + size_file = os.path.getsize(filename) + size_data = np.round(size_file, -3) + size_header = size_file - size_data + + f = open(filename, "rb") + header = f.read(size_header).decode("utf-8") + + prod = header[:2] + + assert prod == product, "Product not in File!" + + prod_cat1 = np.array(["WX", "RX", "EX"]) + prod_cat2 = np.array(["RY", "RW", "AY", "RS", "YW"]) + + nbyte = 1 if prod in prod_cat1 else 2 + signed = "B" if prod in prod_cat1 else "H" + + fac = int(header.split("E-")[1].split("INT")[0]) + dimsplit = header.split("x") + dims = np.array((dimsplit[0][-4:], dimsplit[1][:4]), dtype=int)[::-1] + + data = array.array(signed) + data.fromfile(f, size_data // nbyte) + + f.close() + + data = np.array(np.reshape(data, dims, order="F"), dtype=float).T + + if prod == "SF": + no_echo_value = 0.0 + elif prod in prod_cat2: + no_echo_value = -0.01 + else: + no_echo_value = -32.5 + + if prod in prod_cat1: + data[data >= 249] = np.nan + data = data / 2.0 + no_echo_value + elif prod in prod_cat2: + data, no_data_mask = _identify_info_bits(data) + if prod == "AY": + data = (10 ** (fac * -1)) * data / 2.0 + no_echo_value + else: + data = (10 ** (fac * -1)) * data + no_echo_value + else: + data, no_data_mask = _identify_info_bits(data) + data = (10 ** (fac * -1)) * data / 2.0 + no_echo_value + + data[no_data_mask] = np.nan + + geodata = _import_dwd_geodata(product, dims) + metadata = geodata + + return data, None, metadata + + +def _identify_info_bits(data): + + # clutter + clutter_mask = data - 2**15 >= 0.0 + data[clutter_mask] = 0 + + # negative values + mask = data - 2**14 >= 0.0 + data[mask] -= 2**14 + + # no data + no_data_mask = data - 2**13 == 2500.0 + + if np.sum(no_data_mask) == 0.0: + + no_data_mask = data - 2**13 == 0.0 + + data[no_data_mask] = 0.0 + + # secondary data + data[data - 2**12 > 0.0] -= 2**12 + + data[mask] *= -1 + + return data, no_data_mask + + +def _import_dwd_geodata(product, dims): + """ + Geodata for RADOLAN products. Hard-coded here, cause there is only basic + information about the projection in the header of the binary RADOLAN + files. + """ + + geodata = {} + + projdef = "+a=6378137.0 +b=6356752.0 +proj=stere +lat_ts=60.0 +lat_0=90.0 +lon_0=10.0 +x_0=0 +y_0=0" + geodata["projection"] = projdef + + geodata["xpixelsize"] = 1000.0 + geodata["ypixelsize"] = 1000.0 + geodata["cartesian_unit"] = "m" + geodata["yorigin"] = "lower" + + prod_cat1 = ["RY", "RW", "RX"] # 900x900 + prod_cat2 = ["WN"] # 1100x1200 + prod_cat3 = ["YW"] # 900x1100 + + if product in prod_cat1: # lower left + lon = 3.604382995 + lat = 46.95361536 + if product in prod_cat2: # lower left + lon = 3.566994635 + lat = 45.69642538 + if product in prod_cat3: # center + lon = 9.0 + lat = 51.0 + + pr = pyproj.Proj(projdef) + x1, y1 = pr(lon, lat) + if product in prod_cat3: + x1 -= dims[0] * 1000 // 2 + y1 -= dims[1] * 1000 // 2 - 80000 + + x2 = x1 + dims[0] * 1000 + y2 = y1 + dims[1] * 1000 + + geodata["x1"] = x1 + geodata["y1"] = y1 + geodata["x2"] = x2 + geodata["y2"] = y2 + + return geodata From 1e98e4f6b4d2e25df66c4a94dc4e5cc2ed6828ea Mon Sep 17 00:00:00 2001 From: m-rempel Date: Tue, 3 Jun 2025 08:53:29 +0000 Subject: [PATCH 2/4] Update DWD HDF5 Importer --- pysteps/io/importers.py | 225 ++++++++++++++++++++++------------------ 1 file changed, 126 insertions(+), 99 deletions(-) diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py index 4a37f555..52ebe66a 100644 --- a/pysteps/io/importers.py +++ b/pysteps/io/importers.py @@ -82,11 +82,14 @@ import_odim_hdf5 import_opera_hdf5 import_saf_crri + import_dwd_hdf5 + import_dwd_radolan """ import gzip import os import array +import datetime from functools import partial import numpy as np @@ -1663,91 +1666,46 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): precip = None quality = None - for dsg in f.items(): - if dsg[0].startswith("dataset"): - what_grp_found = False - # check if the "what" group is in the "dataset" group - if "what" in list(dsg[1].keys()): - if "quantity" in dsg[1]["what"].attrs.keys(): - try: - qty_, gain, offset, nodata, undetect = ( - _read_opera_hdf5_what_group(dsg[1]["what"]) - ) - what_grp_found = True - except KeyError: - pass + file_content = {} + _read_hdf5_cont(f, file_content) + f.close() - for dg in dsg[1].items(): - if dg[0][0:4] == "data": - # check if the "what" group is in the "data" group - if "what" in list(dg[1].keys()): - ( - qty_, - gain, - offset, - nodata, - undetect, - ) = _read_opera_hdf5_what_group(dg[1]["what"]) - elif not what_grp_found: - raise DataModelError( - "Non ODIM compliant file: " - "no what group found from {} " - "or its subgroups".format(dg[0]) - ) + data_prop = {} + _get_whatgrp(file_content, data_prop) - if qty_.decode() in [qty, "QIND"]: - arr = dg[1]["data"][...] - mask_n = arr == nodata - mask_u = arr == undetect - mask = np.logical_and(~mask_u, ~mask_n) + arr = file_content["dataset1"]["data1"]["data"] + mask_n = arr == data_prop["nodata"] + mask_u = arr == data_prop["undetect"] + mask = np.logical_and(~mask_u, ~mask_n) - if qty_.decode() == qty: - precip = np.empty(arr.shape) - precip[mask] = arr[mask] * gain + offset - if qty != "DBZH": - precip[mask_u] = offset - else: - precip[mask_u] = -30.0 - precip[mask_n] = np.nan - elif qty_.decode() == "QIND": - quality = np.empty(arr.shape, dtype=float) - quality[mask] = arr[mask] - quality[~mask] = np.nan + if data_prop["quantity"] == qty: + precip = np.empty(arr.shape) + precip[mask] = arr[mask] * data_prop["gain"] + data_prop["offset"] + if qty != "DBZH": + precip[mask_u] = data_prop["offset"] + else: + precip[mask_u] = -32.5 + precip[mask_n] = np.nan + elif data_prop["quantity"] == "QIND": + quality = np.empty(arr.shape, dtype=float) + quality[mask] = arr[mask] + quality[~mask] = np.nan if precip is None: raise IOError("requested quantity %s not found" % qty) - where = f["where"] - if isinstance(where.attrs["projdef"], str): - proj4str = where.attrs["projdef"] - else: - proj4str = where.attrs["projdef"].decode() - pr = pyproj.Proj(proj4str) + pr = pyproj.Proj(file_content["where"]["projdef"]) - ll_lat = where.attrs["LL_lat"] - ll_lon = where.attrs["LL_lon"] - ur_lat = where.attrs["UR_lat"] - ur_lon = where.attrs["UR_lon"] - if ( - "LR_lat" in where.attrs.keys() - and "LR_lon" in where.attrs.keys() - and "UL_lat" in where.attrs.keys() - and "UL_lon" in where.attrs.keys() - ): - lr_lat = float(where.attrs["LR_lat"]) - lr_lon = float(where.attrs["LR_lon"]) - ul_lat = float(where.attrs["UL_lat"]) - ul_lon = float(where.attrs["UL_lon"]) - full_cornerpts = True - else: - full_cornerpts = False - - ll_x, ll_y = pr(ll_lon, ll_lat) - ur_x, ur_y = pr(ur_lon, ur_lat) + ll_x, ll_y = pr(file_content["where"]["LL_lon"], file_content["where"]["LL_lat"]) + ur_x, ur_y = pr(file_content["where"]["UR_lon"], file_content["where"]["UR_lat"]) - if full_cornerpts: - lr_x, lr_y = pr(lr_lon, lr_lat) - ul_x, ul_y = pr(ul_lon, ul_lat) + if len([k for k in file_content["where"].keys() if "_lat" in k]) == 4: + lr_x, lr_y = pr( + file_content["where"]["LR_lon"], file_content["where"]["LR_lat"] + ) + ul_x, ul_y = pr( + file_content["where"]["UL_lon"], file_content["where"]["UL_lat"] + ) x1 = min(ll_x, ul_x) y1 = min(ll_y, lr_y) x2 = max(lr_x, ur_x) @@ -1758,18 +1716,15 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): x2 = ur_x y2 = ur_y - dataset1 = f["dataset1"] - - if "xscale" in where.attrs.keys() and "yscale" in where.attrs.keys(): - xpixelsize = where.attrs["xscale"] - ypixelsize = where.attrs["yscale"] - elif ( - "xscale" in dataset1["where"].attrs.keys() - and "yscale" in dataset1["where"].attrs.keys() + if ( + "where" in file_content["dataset1"].keys() + and "xscale" in file_content["dataset1"]["where"].keys() ): - where = dataset1["where"] - xpixelsize = where.attrs["xscale"] - ypixelsize = where.attrs["yscale"] + xpixelsize = file_content["dataset1"]["where"]["xscale"] + ypixelsize = file_content["dataset1"]["where"]["yscale"] + elif "xscale" in file_content["where"].keys(): + xpixelsize = file_content["where"]["xscale"] + ypixelsize = file_content["where"]["yscale"] else: xpixelsize = None ypixelsize = None @@ -1784,12 +1739,22 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): unit = "mm/h" transform = None + startdate = datetime.datetime.strptime( + f"{file_content['dataset1']['what']['startdate']}{file_content['dataset1']['what']['starttime']}", + "%Y%m%d%H%M%S", + ) + enddate = datetime.datetime.strptime( + f"{file_content['dataset1']['what']['enddate']}{file_content['dataset1']['what']['endtime']}", + "%Y%m%d%H%M%S", + ) + accutime = (enddate - startdate).total_seconds() / 60.0 + metadata = { - "projection": proj4str, - "ll_lon": ll_lon, - "ll_lat": ll_lat, - "ur_lon": ur_lon, - "ur_lat": ur_lat, + "projection": file_content["where"]["projdef"], + "ll_lon": file_content["where"]["LL_lon"], + "ll_lat": file_content["where"]["LL_lat"], + "ur_lon": file_content["where"]["UR_lon"], + "ur_lat": file_content["where"]["UR_lat"], "x1": x1, "y1": y1, "x2": x2, @@ -1798,8 +1763,8 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): "ypixelsize": ypixelsize, "cartesian_unit": "m", "yorigin": "lower", - "institution": "Deutscher Wetterdienst", - "accutime": None, + "institution": file_content["what"]["source"], + "accutime": accutime, "unit": unit, "transform": transform, "zerovalue": np.nanmin(precip), @@ -1813,6 +1778,68 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): return precip, quality, metadata +def _read_hdf5_cont(f, d): + """ + Recursively read nested dictionaries from a HDF5 file. + """ + + # set simple types of hdf content + group_type = h5py._hl.group.Group + + for key, value in f.items(): + + if isinstance(value, group_type): + + d[key] = {} + + if len(list(value.items())) > 0: + + _read_hdf5_cont(value, d[key]) + + else: + + d[key] = {attr: value.attrs[attr] for attr in value.attrs} + d[key] = { + k: v.decode() if type(v) == np.bytes_ else v + for k, v in d[key].items() + } + + else: + + try: + + d[key] = np.array(value) + + except TypeError: + + d[key] = np.array(value.astype(float)) + + except: + + d[key] = value.value + + return + + +def _get_whatgrp(d, g): + """ + Recursively get the what group containing the data field properties. + """ + if "what" in d.keys(): + if "gain" in d["what"].keys(): + g.update(d["what"]) + else: + k = [k for k in d.keys() if "data" in k][0] + _get_whatgrp(d[k], g) + else: + raise DataModelError( + "Non ODIM compliant file: " + "no what group found from {} " + "or its subgroups".format(d.keys()[0]) + ) + return + + @postprocess_import() def import_dwd_radolan(filename, product): """ @@ -1848,8 +1875,8 @@ def import_dwd_radolan(filename, product): assert prod == product, "Product not in File!" - prod_cat1 = np.array(["WX", "RX", "EX"]) - prod_cat2 = np.array(["RY", "RW", "AY", "RS", "YW"]) + prod_cat1 = np.array(["WX", "RX"]) + prod_cat2 = np.array(["RY", "RW", "YW"]) nbyte = 1 if prod in prod_cat1 else 2 signed = "B" if prod in prod_cat1 else "H" @@ -1937,9 +1964,9 @@ def _import_dwd_geodata(product, dims): geodata["cartesian_unit"] = "m" geodata["yorigin"] = "lower" - prod_cat1 = ["RY", "RW", "RX"] # 900x900 - prod_cat2 = ["WN"] # 1100x1200 - prod_cat3 = ["YW"] # 900x1100 + prod_cat1 = ["RX", "RY", "RW"] # 900x900 + prod_cat2 = ["WN"] # 1200x1100 + prod_cat3 = ["WX", "YW"] # 1100x900 if product in prod_cat1: # lower left lon = 3.604382995 From 9d47508642153749ca47b8b16d7f13ea3e19f97d Mon Sep 17 00:00:00 2001 From: m-rempel Date: Tue, 24 Jun 2025 14:32:09 +0000 Subject: [PATCH 3/4] Add reprojection of unstructured grids --- pysteps/io/importers.py | 17 +++-- pysteps/utils/reprojection.py | 123 ++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 4 deletions(-) diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py index 52ebe66a..a9478354 100644 --- a/pysteps/io/importers.py +++ b/pysteps/io/importers.py @@ -1656,6 +1656,13 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): "but it is not installed" ) + if not PYPROJ_IMPORTED: + raise MissingOptionalDependency( + "pyproj package is required to import " + "DWD's radar reflectivity composite " + "but it is not installed" + ) + if qty not in ["ACRR", "DBZH", "RATE"]: raise ValueError( "unknown quantity %s: the available options are 'ACRR', 'DBZH' and 'RATE'" @@ -1740,11 +1747,13 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): transform = None startdate = datetime.datetime.strptime( - f"{file_content['dataset1']['what']['startdate']}{file_content['dataset1']['what']['starttime']}", + file_content["dataset1"]["what"]["startdate"] + + file_content["dataset1"]["what"]["starttime"], "%Y%m%d%H%M%S", ) enddate = datetime.datetime.strptime( - f"{file_content['dataset1']['what']['enddate']}{file_content['dataset1']['what']['endtime']}", + file_content["dataset1"]["what"]["enddate"] + + file_content["dataset1"]["what"]["endtime"], "%Y%m%d%H%M%S", ) accutime = (enddate - startdate).total_seconds() / 60.0 @@ -1762,7 +1771,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): "xpixelsize": xpixelsize, "ypixelsize": ypixelsize, "cartesian_unit": "m", - "yorigin": "lower", + "yorigin": "upper", "institution": file_content["what"]["source"], "accutime": accutime, "unit": unit, @@ -1962,7 +1971,7 @@ def _import_dwd_geodata(product, dims): geodata["xpixelsize"] = 1000.0 geodata["ypixelsize"] = 1000.0 geodata["cartesian_unit"] = "m" - geodata["yorigin"] = "lower" + geodata["yorigin"] = "upper" prod_cat1 = ["RX", "RY", "RW"] # 900x900 prod_cat2 = ["WN"] # 1200x1100 diff --git a/pysteps/utils/reprojection.py b/pysteps/utils/reprojection.py index 6144a52c..0123776e 100644 --- a/pysteps/utils/reprojection.py +++ b/pysteps/utils/reprojection.py @@ -12,8 +12,10 @@ reproject_grids """ from pysteps.exceptions import MissingOptionalDependency +from scipy.interpolate import griddata import numpy as np +import xarray as xr try: from rasterio import Affine as A @@ -23,6 +25,13 @@ except ImportError: RASTERIO_IMPORTED = False +try: + import pyproj + + PYPROJ_IMPORTED = True +except ImportError: + PYPROJ_IMPORTED = False + def reproject_grids(src_array, dst_array, metadata_src, metadata_dst): """ @@ -118,3 +127,117 @@ def reproject_grids(src_array, dst_array, metadata_src, metadata_dst): metadata[key] = metadata_dst[key] return r_rprj, metadata + + +def unstructured2regular(src_array, metadata_src, metadata_dst): + """ + Reproject unstructured data onto a regular grid. + + Parameters + ---------- + src_array: xarray + xarray of shape (t, n_ens, ngridcells) containing a time + series of precipitation enesemble forecasts. These precipitation fields + will be reprojected. + metadata_src: dict + Metadata dictionary containing the projection, clon, clat, and ngridcells + and attributes of the src_array as described in the documentation of + :py:mod:`pysteps.io.importers`. + metadata_dst: dict + Metadata dictionary containing the projection, x- and ypixelsize, x1 and + y2 attributes of the dst_array. + + Returns + ------- + r_rprj: xarray + Three-dimensional array of shape (t, n_ens, x, y) containing the + precipitation fields of src_array, but reprojected to the domain + of dst_array. + metadata: dict + Metadata dictionary containing the projection, x- and ypixelsize, x1 and + y2 attributes of the reprojected src_array. + """ + + if not PYPROJ_IMPORTED: + raise MissingOptionalDependency( + "pyproj package is required to reproject DWD's NWP data" + "but it is not installed" + ) + + # Get number of grid cells + Nc = src_array["longitude"].shape[0] + ic_in = np.arange(Nc) + + # Get cartesian coordinates of destination grid + x_dst = np.arange( + np.float32(metadata_dst["x1"]), + np.float32(metadata_dst["x2"]), + metadata_dst["xpixelsize"], + ) + + y_dst = np.arange( + np.float32(metadata_dst["y1"]), + np.float32(metadata_dst["y2"]), + metadata_dst["ypixelsize"], + ) + + if metadata_dst["yorigin"] == "upper": + y_dst = y_dst[::-1] + xx_dst, yy_dst = np.meshgrid(x_dst, y_dst) + s_out = yy_dst.shape + P_out = np.array((xx_dst.flatten(), yy_dst.flatten())).T + + # Extract the grid info from src_array assuming the same projection of src and dst + pr = pyproj.Proj(metadata_dst["projection"]) + x_src, y_src = pr(src_array["longitude"].values, src_array["latitude"].values) + P_in = np.stack((x_src, y_src)).T + + ic_out = ( + griddata(P_in, ic_in.flatten(), P_out, method="nearest") + .reshape(s_out) + .astype(int) + ) + + data_rprj = np.array( + [ + [src_array[i, j].values[ic_out] for j in range(src_array.shape[1])] + for i in range(src_array.shape[0]) + ] + ) + dims = ["time", "ens_no", "south_north", "west_east"] + lon, lat = pr(xx_dst, yy_dst, inverse=True) + coords = { + "time": src_array["time"], + "ens_no": src_array["ens_no"], + "west_east": np.arange(0, len(x_dst), 1), + "south_north": np.arange(0, len(y_dst), 1), + "x": ("west_east", np.arange(0, len(x_dst), 1), {"units": "1"}), + "y": ("south_north", np.arange(0, len(y_dst), 1), {"units": "1"}), + "projection_x_coordinate": ("west_east", x_dst, {"units": "m"}), + "projection_y_coordinate": ("south_north", y_dst, {"units": "m"}), + "longitude": ( + ["south_north", "west_east"], + lon, + {"units": "degrees_north"}, + ), + "latitude": (["south_north", "west_east"], lat, {"units": "degrees_east"}), + } + xr_rprj = xr.DataArray(data=data_rprj, dims=dims, coords=coords) + + # Update the metadata + metadata = metadata_src.copy() + + for key in [ + "projection", + "yorigin", + "xpixelsize", + "ypixelsize", + "x1", + "x2", + "y1", + "y2", + "cartesian_unit", + ]: + metadata[key] = metadata_dst[key] + + return xr_rprj, metadata From f4b150666801165dcf5069218cbcbf45dbfd909d Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Tue, 1 Jul 2025 16:30:54 +0200 Subject: [PATCH 4/4] docs: add docstrings to function and run black --- pysteps/io/importers.py | 151 ++++++++++++++++++++++++++++------------ 1 file changed, 106 insertions(+), 45 deletions(-) diff --git a/pysteps/io/importers.py b/pysteps/io/importers.py index a9478354..42d1be9c 100644 --- a/pysteps/io/importers.py +++ b/pysteps/io/importers.py @@ -1668,8 +1668,8 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): "unknown quantity %s: the available options are 'ACRR', 'DBZH' and 'RATE'" ) + # Read the data f = h5py.File(filename, "r") - precip = None quality = None @@ -1685,6 +1685,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): mask_u = arr == data_prop["undetect"] mask = np.logical_and(~mask_u, ~mask_n) + # Read the precipitation variable and quantity from the data if data_prop["quantity"] == qty: precip = np.empty(arr.shape) precip[mask] = arr[mask] * data_prop["gain"] + data_prop["offset"] @@ -1701,8 +1702,8 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): if precip is None: raise IOError("requested quantity %s not found" % qty) + # Get the projection and grid information from the HDF5 file pr = pyproj.Proj(file_content["where"]["projdef"]) - ll_x, ll_y = pr(file_content["where"]["LL_lon"], file_content["where"]["LL_lat"]) ur_x, ur_y = pr(file_content["where"]["UR_lon"], file_content["where"]["UR_lat"]) @@ -1723,6 +1724,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): x2 = ur_x y2 = ur_y + # Get the grid cell size if ( "where" in file_content["dataset1"].keys() and "xscale" in file_content["dataset1"]["where"].keys() @@ -1736,6 +1738,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): xpixelsize = None ypixelsize = None + # Get the unit and transform if qty == "ACRR": unit = "mm" transform = None @@ -1746,6 +1749,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): unit = "mm/h" transform = None + # Extract the time step startdate = datetime.datetime.strptime( file_content["dataset1"]["what"]["startdate"] + file_content["dataset1"]["what"]["starttime"], @@ -1758,6 +1762,7 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): ) accutime = (enddate - startdate).total_seconds() / 60.0 + # Finally, fill out the metadata metadata = { "projection": file_content["where"]["projdef"], "ll_lon": file_content["where"]["LL_lon"], @@ -1790,23 +1795,30 @@ def import_dwd_hdf5(filename, qty="RATE", **kwargs): def _read_hdf5_cont(f, d): """ Recursively read nested dictionaries from a HDF5 file. - """ + + Parameters: + ----------- + f : h5py.Group or h5py.File + The current group or file object from which to read data. + d : dict + The dictionary to populate with the contents of the HDF5 group. + + Returns: + -------- + None. + """ # set simple types of hdf content group_type = h5py._hl.group.Group for key, value in f.items(): - if isinstance(value, group_type): - d[key] = {} - if len(list(value.items())) > 0: - + # Recurse into non-empty group _read_hdf5_cont(value, d[key]) - else: - + # Handle empty group with attributes d[key] = {attr: value.attrs[attr] for attr in value.attrs} d[key] = { k: v.decode() if type(v) == np.bytes_ else v @@ -1814,17 +1826,11 @@ def _read_hdf5_cont(f, d): } else: - try: - d[key] = np.array(value) - except TypeError: - d[key] = np.array(value.astype(float)) - except: - d[key] = value.value return @@ -1873,34 +1879,41 @@ def import_dwd_radolan(filename, product): from a MeteoSwiss gif file and the associated quality field and metadata. The quality field is currently set to None. """ + # Determine file size and header size size_file = os.path.getsize(filename) size_data = np.round(size_file, -3) size_header = size_file - size_data + # Read the file f = open(filename, "rb") header = f.read(size_header).decode("utf-8") + # Check if the product code is the same as the provided product variable prod = header[:2] - assert prod == product, "Product not in File!" + # Define product categories prod_cat1 = np.array(["WX", "RX"]) prod_cat2 = np.array(["RY", "RW", "YW"]) + # Determine byte size and data type nbyte = 1 if prod in prod_cat1 else 2 signed = "B" if prod in prod_cat1 else "H" + # Extract the scaling factor and grid dimensions fac = int(header.split("E-")[1].split("INT")[0]) dimsplit = header.split("x") dims = np.array((dimsplit[0][-4:], dimsplit[1][:4]), dtype=int)[::-1] + # Read binary data data = array.array(signed) data.fromfile(f, size_data // nbyte) - f.close() + # Reshape and transpose data to match grid layout data = np.array(np.reshape(data, dims, order="F"), dtype=float).T + # Define no-echo values based on product type if prod == "SF": no_echo_value = 0.0 elif prod in prod_cat2: @@ -1908,21 +1921,24 @@ def import_dwd_radolan(filename, product): else: no_echo_value = -32.5 + # Apply scaling and handle missing data if prod in prod_cat1: data[data >= 249] = np.nan data = data / 2.0 + no_echo_value elif prod in prod_cat2: data, no_data_mask = _identify_info_bits(data) if prod == "AY": - data = (10 ** (fac * -1)) * data / 2.0 + no_echo_value + data = (10 ** (-fac)) * data / 2.0 + no_echo_value else: - data = (10 ** (fac * -1)) * data + no_echo_value + data = (10 ** (-fac)) * data + no_echo_value else: data, no_data_mask = _identify_info_bits(data) - data = (10 ** (fac * -1)) * data / 2.0 + no_echo_value + data = (10 ** (-fac)) * data / 2.0 + no_echo_value + # Mask out no-data values data[no_data_mask] = np.nan + # Load geospatial metadata geodata = _import_dwd_geodata(product, dims) metadata = geodata @@ -1930,27 +1946,47 @@ def import_dwd_radolan(filename, product): def _identify_info_bits(data): + """ + Identifies and processes information bits embedded in RADOLAN data values. - # clutter + This function decodes metadata flags embedded in the RADOLAN data array, such as: + - Clutter (bit 15) + - Negative values (bit 14) + - No data (bit 13) + - Secondary data (bit 12) + + Parameters + ---------- + data : np.ndarray + The raw RADOLAN data array containing encoded information bits. + + Returns + ------- + tuple + A tuple containing: + - data : np.ndarray + The cleaned and decoded data array. + - no_data_mask : np.ndarray + A boolean mask indicating positions of no-data values. + """ + # Identify and remove clutter (bit 15) clutter_mask = data - 2**15 >= 0.0 data[clutter_mask] = 0 - # negative values + # Identify and convert negative values (bit 14) mask = data - 2**14 >= 0.0 data[mask] -= 2**14 - # no data + # Identify no-data values (bit 13) no_data_mask = data - 2**13 == 2500.0 - if np.sum(no_data_mask) == 0.0: - no_data_mask = data - 2**13 == 0.0 - data[no_data_mask] = 0.0 - # secondary data + # Identify and remove secondary data flag (bit 12) data[data - 2**12 > 0.0] -= 2**12 + # Apply negative sign to previously marked negative values data[mask] *= -1 return data, no_data_mask @@ -1958,41 +1994,66 @@ def _identify_info_bits(data): def _import_dwd_geodata(product, dims): """ - Geodata for RADOLAN products. Hard-coded here, cause there is only basic - information about the projection in the header of the binary RADOLAN - files. - """ + Generate geospatial metadata for RADOLAN precipitation products. + Since RADOLAN binary files contain only limited projection metadata, + this function provides hard-coded geospatial definitions and calculates + the bounding box of the data grid based on the product type and dimensions. + + Parameters + ---------- + product : str + The RADOLAN product code (e.g., 'RX', 'WX', 'WN', etc.). + dims : tuple of int + The dimensions of the data grid (rows, columns). + + Returns + ------- + geodata : dict + A dictionary containing: + - 'projection': PROJ.4 string defining the stereographic projection. + - 'xpixelsize', 'ypixelsize': Pixel size in meters. + - 'cartesian_unit': Unit of the coordinate system (meters). + - 'yorigin': Origin of the y-axis ('upper'). + - 'x1', 'y1': Coordinates of the lower-left corner. + - 'x2', 'y2': Coordinates of the upper-right corner. + """ geodata = {} - projdef = "+a=6378137.0 +b=6356752.0 +proj=stere +lat_ts=60.0 +lat_0=90.0 +lon_0=10.0 +x_0=0 +y_0=0" + # Define stereographic projection used by RADOLAN + projdef = ( + "+a=6378137.0 +b=6356752.0 +proj=stere +lat_ts=60.0 " + "+lat_0=90.0 +lon_0=10.0 +x_0=0 +y_0=0" + ) geodata["projection"] = projdef - geodata["xpixelsize"] = 1000.0 geodata["ypixelsize"] = 1000.0 geodata["cartesian_unit"] = "m" geodata["yorigin"] = "upper" - prod_cat1 = ["RX", "RY", "RW"] # 900x900 - prod_cat2 = ["WN"] # 1200x1100 - prod_cat3 = ["WX", "YW"] # 1100x900 + # Define product categories and their reference points + prod_cat1 = ["RX", "RY", "RW"] + prod_cat2 = ["WN"] + prod_cat3 = ["WX", "YW"] - if product in prod_cat1: # lower left - lon = 3.604382995 - lat = 46.95361536 - if product in prod_cat2: # lower left - lon = 3.566994635 - lat = 45.69642538 - if product in prod_cat3: # center - lon = 9.0 - lat = 51.0 + # Assign reference coordinates based on product type + if product in prod_cat1: + lon, lat = 3.604382995, 46.95361536 + elif product in prod_cat2: + lon, lat = 3.566994635, 45.69642538 + elif product in prod_cat3: + lon, lat = 9.0, 51.0 + # Project reference coordinates to Cartesian system pr = pyproj.Proj(projdef) x1, y1 = pr(lon, lat) + + # Adjust origin for center-based products if product in prod_cat3: x1 -= dims[0] * 1000 // 2 y1 -= dims[1] * 1000 // 2 - 80000 + # Calculate bounding box x2 = x1 + dims[0] * 1000 y2 = y1 + dims[1] * 1000