Skip to content

Commit 5de0bcb

Browse files
committed
make dimension.py xarray compatible (#397)
* make dimension.py xarray compatible * convert final method in the dimension module * nanmin in stead of zerovalue in square domain method * make test steps skill run * undo accidental change * remove commented out code * The dataset can contain more than one dataarray * Address pull request comments * Add links to dataset documentation everywhere
1 parent a731ab1 commit 5de0bcb

File tree

8 files changed

+522
-540
lines changed

8 files changed

+522
-540
lines changed

pysteps/converters.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"""
1313

1414
import numpy as np
15+
import numpy.typing as npt
1516
import pyproj
1617
import xarray as xr
1718

@@ -67,6 +68,15 @@ def _convert_proj4_to_grid_mapping(proj4str):
6768
return grid_mapping_var_name, grid_mapping_name, params
6869

6970

71+
def compute_lat_lon(
72+
x_r: npt.ArrayLike, y_r: npt.ArrayLike, projection: str
73+
) -> tuple[npt.ArrayLike, npt.ArrayLike]:
74+
x_2d, y_2d = np.meshgrid(x_r, y_r)
75+
pr = pyproj.Proj(projection)
76+
lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True)
77+
return lat.reshape(x_2d.shape), lon.reshape(x_2d.shape)
78+
79+
7080
def convert_to_xarray_dataset(
7181
precip: np.ndarray,
7282
quality: np.ndarray | None,
@@ -105,9 +115,7 @@ def convert_to_xarray_dataset(
105115
if metadata["yorigin"] == "upper":
106116
y_r = np.flip(y_r)
107117

108-
x_2d, y_2d = np.meshgrid(x_r, y_r)
109-
pr = pyproj.Proj(metadata["projection"])
110-
lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True)
118+
lat, lon = compute_lat_lon(x_r, y_r, metadata["projection"])
111119

112120
(
113121
grid_mapping_var_name,
@@ -166,7 +174,7 @@ def convert_to_xarray_dataset(
166174
),
167175
"lon": (
168176
["y", "x"],
169-
lon.reshape(precip.shape),
177+
lon,
170178
{
171179
"long_name": "longitude coordinate",
172180
"standard_name": "longitude",
@@ -176,7 +184,7 @@ def convert_to_xarray_dataset(
176184
),
177185
"lat": (
178186
["y", "x"],
179-
lat.reshape(precip.shape),
187+
lat,
180188
{
181189
"long_name": "latitude coordinate",
182190
"standard_name": "latitude",

pysteps/io/importers.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,93 @@
6565
| zr_b | the Z-R exponent b in Z = a*R**b |
6666
+------------------+----------------------------------------------------------+
6767
68+
The data and metadata is then postprocessed into an xarray dataset. This dataset will
69+
always contain an x and y dimension, but can be extended with a time dimension and/or
70+
an ensemble member dimension over the course of the process.
71+
72+
The dataset can contain the following coordinate variables:
73+
74+
75+
.. tabularcolumns:: |p{2cm}|L|
76+
77+
+---------------+-------------------------------------------------------------------------------------------+
78+
| Coordinate | Description |
79+
+===============+===========================================================================================+
80+
| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
81+
+---------------+-------------------------------------------------------------------------------------------+
82+
| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
83+
+---------------+-------------------------------------------------------------------------------------------+
84+
| lat | latitude coordinate in degrees |
85+
+---------------+-------------------------------------------------------------------------------------------+
86+
| lon | longitude coordinate in degrees |
87+
+---------------+-------------------------------------------------------------------------------------------+
88+
| time | forecast time in seconds since forecast start time |
89+
+---------------+-------------------------------------------------------------------------------------------+
90+
| member | ensemble member number (integer) |
91+
+---------------+-------------------------------------------------------------------------------------------+
92+
93+
94+
The dataset can contain the following data variables:
95+
96+
.. tabularcolumns:: |p{2cm}|L|
97+
98+
+-------------------+-----------------------------------------------------------------------------------------------------------+
99+
| Variable | Description |
100+
+===================+===========================================================================================================+
101+
| precip_intensity, | precipitation data, based on the unit the data has it is stored in one of these 3 possible variables |
102+
| precip_accum | precip_intensity if unit is ``mm/h``, precip_accum if unit is ``mm`` and reflectivity if unit is ``dBZ``, |
103+
| or reflectivity | the attributes of this variable contain metadata relevant to this attribute (see below) |
104+
+-------------------+-----------------------------------------------------------------------------------------------------------+
105+
| quality | value between 0 and 1 denoting the quality of the precipitation data, currently not used for anything |
106+
+-------------------+-----------------------------------------------------------------------------------------------------------+
107+
108+
Some of the metadata in the metadata dictionary is not explicitely stored in the dataset,
109+
but is still implicitly present. For example ``x1`` can easily be found by taking the first
110+
value from the x coordinate variable. Metadata that is not implicitly present is explicitly
111+
stored either in the datasets global attributes or as attributes of the precipitation variable.
112+
Data that relates to the entire dataset is stored in the global attributes. The following data
113+
is stored in the global attributes:
114+
115+
.. tabularcolumns:: |p{2cm}|L|
116+
117+
+------------------+----------------------------------------------------------+
118+
| Key | Value |
119+
+==================+==========================================================+
120+
| projection | PROJ.4-compatible projection definition |
121+
+------------------+----------------------------------------------------------+
122+
| institution | name of the institution who provides the data |
123+
+------------------+----------------------------------------------------------+
124+
| precip_var | the name of the precipitation variable in this dataset |
125+
+------------------+----------------------------------------------------------+
126+
127+
The following data is stored as attributes of the precipitation variable:
128+
129+
.. tabularcolumns:: |p{2cm}|L|
130+
131+
+------------------+----------------------------------------------------------+
132+
| Key | Value |
133+
+==================+==========================================================+
134+
| units | the physical unit of the data: 'mm/h', 'mm' or 'dBZ' |
135+
+------------------+----------------------------------------------------------+
136+
| transform | the transformation of the data: None, 'dB', 'Box-Cox' or |
137+
| | others |
138+
+------------------+----------------------------------------------------------+
139+
| accutime | the accumulation time in minutes of the data, float |
140+
+------------------+----------------------------------------------------------+
141+
| threshold | the rain/no rain threshold with the same unit, |
142+
| | transformation and accutime of the data. |
143+
+------------------+----------------------------------------------------------+
144+
| zerovalue | the value assigned to the no rain pixels with the same |
145+
| | unit, transformation and accutime of the data. |
146+
+------------------+----------------------------------------------------------+
147+
| zr_a | the Z-R constant a in Z = a*R**b |
148+
+------------------+----------------------------------------------------------+
149+
| zr_b | the Z-R exponent b in Z = a*R**b |
150+
+------------------+----------------------------------------------------------+
151+
152+
Furthermore the dataset can contain some additional metadata to make the dataset
153+
CF-compliant.
154+
68155
Available Importers
69156
-------------------
70157

pysteps/tests/helpers.py

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import pysteps as stp
1515
from pysteps import io, rcparams
1616
from pysteps.utils import aggregate_fields_space
17+
from pysteps.utils.dimension import clip_domain
1718

1819
_reference_dates = dict()
1920
_reference_dates["bom"] = datetime(2018, 6, 16, 10, 0)
@@ -53,7 +54,6 @@ def get_precipitation_fields(
5354
num_prev_files=0,
5455
num_next_files=0,
5556
return_raw=False,
56-
metadata=False,
5757
upscale=None,
5858
source="mch",
5959
log_transform=True,
@@ -100,9 +100,6 @@ def get_precipitation_fields(
100100
The pre-processing steps are: 1) Convert to mm/h,
101101
2) Mask invalid values, 3) Log-transform the data [dBR].
102102
103-
metadata: bool, optional
104-
If True, also return file metadata.
105-
106103
upscale: float or None, optional
107104
Upscale fields in space during the pre-processing steps.
108105
If it is None, the precipitation field is not modified.
@@ -127,8 +124,8 @@ def get_precipitation_fields(
127124
128125
Returns
129126
-------
130-
reference_field : array
131-
metadata : dict
127+
dataset: xarray.Dataset
128+
As described in the documentation of :py:mod:`pysteps.io.importers`.
132129
"""
133130

134131
if source == "bom":
@@ -186,41 +183,34 @@ def get_precipitation_fields(
186183
# Read the radar composites
187184
importer = io.get_method(importer_name, "importer")
188185

189-
ref_dataset = io.read_timeseries(fns, importer, **_importer_kwargs)
186+
dataset = io.read_timeseries(fns, importer, **_importer_kwargs)
190187

191188
if not return_raw:
192-
if (num_prev_files == 0) and (num_next_files == 0):
193-
# Remove time dimension
194-
reference_field = np.squeeze(reference_field)
189+
precip_var = dataset.attrs["precip_var"]
195190

196191
# Convert to mm/h
197-
ref_dataset = stp.utils.to_rainrate(ref_dataset)
192+
dataset = stp.utils.to_rainrate(dataset)
193+
precip_var = dataset.attrs["precip_var"]
198194

199195
# Clip domain
200-
ref_dataset = stp.utils.clip_domain(ref_dataset, clip)
196+
dataset = clip_domain(dataset, clip)
201197

202198
# Upscale data
203-
reference_field, ref_metadata = aggregate_fields_space(
204-
reference_field, ref_metadata, upscale
205-
)
199+
dataset = aggregate_fields_space(dataset, upscale)
206200

207201
# Mask invalid values
208-
reference_field = np.ma.masked_invalid(reference_field)
202+
valid_mask = np.isfinite(dataset[precip_var].values)
209203

210204
if log_transform:
211205
# Log-transform the data [dBR]
212-
reference_field, ref_metadata = stp.utils.dB_transform(
213-
reference_field, ref_metadata, threshold=0.1, zerovalue=-15.0
214-
)
206+
dataset = stp.utils.dB_transform(dataset, threshold=0.1, zerovalue=-15.0)
215207

216208
# Set missing values with the fill value
217-
np.ma.set_fill_value(reference_field, ref_metadata["zerovalue"])
218-
reference_field.data[reference_field.mask] = ref_metadata["zerovalue"]
219-
220-
if metadata:
221-
return reference_field, ref_metadata
209+
metadata = dataset[precip_var].attrs
210+
zerovalue = metadata["zerovalue"]
211+
dataset[precip_var].data[~valid_mask] = zerovalue
222212

223-
return reference_field
213+
return dataset
224214

225215

226216
def smart_assert(actual_value, expected, tolerance=None):

pysteps/tests/test_nowcasts_steps.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from pysteps import io, motion, nowcasts, verification
88
from pysteps.tests.helpers import get_precipitation_fields
99

10-
1110
steps_arg_names = (
1211
"n_ens_members",
1312
"n_cascade_levels",
@@ -44,28 +43,29 @@ def test_steps_skill(
4443
):
4544
"""Tests STEPS nowcast skill."""
4645
# inputs
47-
precip_input, metadata = get_precipitation_fields(
46+
dataset_input = get_precipitation_fields(
4847
num_prev_files=2,
4948
num_next_files=0,
5049
return_raw=False,
5150
metadata=True,
5251
upscale=2000,
5352
)
54-
precip_input = precip_input.filled()
5553

56-
precip_obs = get_precipitation_fields(
54+
dataset_obs = get_precipitation_fields(
5755
num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000
58-
)[1:, :, :]
59-
precip_obs = precip_obs.filled()
56+
).isel(time=slice(1, None, None))
57+
precip_var = dataset_input.attrs["precip_var"]
58+
metadata = dataset_input[precip_var].attrs
59+
precip_data = dataset_input[precip_var].values
6060

6161
pytest.importorskip("cv2")
6262
oflow_method = motion.get_method("LK")
63-
retrieved_motion = oflow_method(precip_input)
63+
retrieved_motion = oflow_method(precip_data)
6464

6565
nowcast_method = nowcasts.get_method("steps")
6666

6767
precip_forecast = nowcast_method(
68-
precip_input,
68+
precip_data,
6969
retrieved_motion,
7070
timesteps=timesteps,
7171
precip_thr=metadata["threshold"],
@@ -86,7 +86,9 @@ def test_steps_skill(
8686
timesteps if isinstance(timesteps, int) else len(timesteps)
8787
)
8888

89-
crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1])
89+
crps = verification.probscores.CRPS(
90+
precip_forecast[:, -1], dataset_obs[precip_var].values[-1]
91+
)
9092
assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}"
9193

9294

0 commit comments

Comments
 (0)