Skip to content

Commit e674286

Browse files
kmuehlbauerpre-commit-ci[bot]dcherian
authored
fix cf decoding of grid_mapping (#9765)
* fix cf decoding of grid_mapping * fix linter * unnest list, add tests * add whats-new.rst entry * check for second warning, copy to prevent windows error (?) * revert copy, but set allow_cleanup_failures=ON_WINDOWS * add itertools * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update xarray/conventions.py Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com> * Update conventions.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test in test_conventions.py * add comment * revert backend tests --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
1 parent 0f8ff5c commit e674286

File tree

3 files changed

+130
-13
lines changed

3 files changed

+130
-13
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ Bug fixes
5858
By `Stephan Hoyer <https://github.com/shoyer>`_.
5959
- Fix regression in the interoperability of :py:meth:`DataArray.polyfit` and :py:meth:`xr.polyval` for date-time coordinates. (:pull:`9691`).
6060
By `Pascal Bourgault <https://github.com/aulemahal>`_.
61+
- Fix CF decoding of ``grid_mapping`` to allow all possible formats, add tests (:issue:`9761`, :pull:`9765`).
62+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
6163

6264
Documentation
6365
~~~~~~~~~~~~~

xarray/conventions.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import itertools
34
from collections import defaultdict
45
from collections.abc import Hashable, Iterable, Mapping, MutableMapping
56
from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union
@@ -31,6 +32,7 @@
3132
"formula_terms",
3233
)
3334
CF_RELATED_DATA_NEEDS_PARSING = (
35+
"grid_mapping",
3436
"cell_measures",
3537
"formula_terms",
3638
)
@@ -476,18 +478,41 @@ def stackable(dim: Hashable) -> bool:
476478
if decode_coords == "all":
477479
for attr_name in CF_RELATED_DATA:
478480
if attr_name in var_attrs:
479-
attr_val = var_attrs[attr_name]
480-
if attr_name not in CF_RELATED_DATA_NEEDS_PARSING:
481-
var_names = attr_val.split()
482-
else:
483-
roles_and_names = [
484-
role_or_name
485-
for part in attr_val.split(":")
486-
for role_or_name in part.split()
487-
]
488-
if len(roles_and_names) % 2 == 1:
489-
emit_user_level_warning(f"Attribute {attr_name} malformed")
490-
var_names = roles_and_names[1::2]
481+
# fixes stray colon
482+
attr_val = var_attrs[attr_name].replace(" :", ":")
483+
var_names = attr_val.split()
484+
# if grid_mapping is a single string, do not enter here
485+
if (
486+
attr_name in CF_RELATED_DATA_NEEDS_PARSING
487+
and len(var_names) > 1
488+
):
489+
# map the keys to list of strings
490+
# "A: b c d E: f g" returns
491+
# {"A": ["b", "c", "d"], "E": ["f", "g"]}
492+
roles_and_names = defaultdict(list)
493+
key = None
494+
for vname in var_names:
495+
if ":" in vname:
496+
key = vname.strip(":")
497+
else:
498+
if key is None:
499+
raise ValueError(
500+
f"First element {vname!r} of [{attr_val!r}] misses ':', "
501+
f"cannot decode {attr_name!r}."
502+
)
503+
roles_and_names[key].append(vname)
504+
# for grid_mapping keys are var_names
505+
if attr_name == "grid_mapping":
506+
var_names = list(roles_and_names.keys())
507+
else:
508+
# for cell_measures and formula_terms values are var names
509+
var_names = list(itertools.chain(*roles_and_names.values()))
510+
# consistency check (one element per key)
511+
if len(var_names) != len(roles_and_names.keys()):
512+
emit_user_level_warning(
513+
f"Attribute {attr_name!r} has malformed content [{attr_val!r}], "
514+
f"decoding {var_names!r} to coordinates."
515+
)
491516
if all(var_name in variables for var_name in var_names):
492517
new_vars[k].encoding[attr_name] = attr_val
493518
coord_names.update(var_names)
@@ -732,7 +757,7 @@ def _encode_coordinates(
732757
# the dataset faithfully. Because this serialization goes beyond CF
733758
# conventions, only do it if necessary.
734759
# Reference discussion:
735-
# http://mailman.cgd.ucar.edu/pipermail/cf-metadata/2014/007571.html
760+
# https://cfconventions.org/mailing-list-archive/Data/7400.html
736761
global_coordinates.difference_update(written_coords)
737762
if global_coordinates:
738763
attributes = dict(attributes)

xarray/tests/test_conventions.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,96 @@ def test_decode_coordinates(self) -> None:
294294
actual = conventions.decode_cf(original)
295295
assert actual.foo.encoding["coordinates"] == "x"
296296

297+
def test_decode_coordinates_with_key_values(self) -> None:
298+
# regression test for GH9761
299+
original = Dataset(
300+
{
301+
"temp": (
302+
("y", "x"),
303+
np.random.rand(2, 2),
304+
{
305+
"long_name": "temperature",
306+
"units": "K",
307+
"coordinates": "lat lon",
308+
"grid_mapping": "crs",
309+
},
310+
),
311+
"x": (
312+
("x"),
313+
np.arange(2),
314+
{"standard_name": "projection_x_coordinate", "units": "m"},
315+
),
316+
"y": (
317+
("y"),
318+
np.arange(2),
319+
{"standard_name": "projection_y_coordinate", "units": "m"},
320+
),
321+
"lat": (
322+
("y", "x"),
323+
np.random.rand(2, 2),
324+
{"standard_name": "latitude", "units": "degrees_north"},
325+
),
326+
"lon": (
327+
("y", "x"),
328+
np.random.rand(2, 2),
329+
{"standard_name": "longitude", "units": "degrees_east"},
330+
),
331+
"crs": (
332+
(),
333+
None,
334+
{
335+
"grid_mapping_name": "transverse_mercator",
336+
"longitude_of_central_meridian": -2.0,
337+
},
338+
),
339+
"crs2": (
340+
(),
341+
None,
342+
{
343+
"grid_mapping_name": "longitude_latitude",
344+
"longitude_of_central_meridian": -2.0,
345+
},
346+
),
347+
},
348+
)
349+
350+
original.temp.attrs["grid_mapping"] = "crs: x y"
351+
vars, attrs, coords = conventions.decode_cf_variables(
352+
original.variables, {}, decode_coords="all"
353+
)
354+
assert coords == {"lat", "lon", "crs"}
355+
356+
original.temp.attrs["grid_mapping"] = "crs: x y crs2: lat lon"
357+
vars, attrs, coords = conventions.decode_cf_variables(
358+
original.variables, {}, decode_coords="all"
359+
)
360+
assert coords == {"lat", "lon", "crs", "crs2"}
361+
362+
# stray colon
363+
original.temp.attrs["grid_mapping"] = "crs: x y crs2 : lat lon"
364+
vars, attrs, coords = conventions.decode_cf_variables(
365+
original.variables, {}, decode_coords="all"
366+
)
367+
assert coords == {"lat", "lon", "crs", "crs2"}
368+
369+
original.temp.attrs["grid_mapping"] = "crs x y crs2: lat lon"
370+
with pytest.raises(ValueError, match="misses ':'"):
371+
conventions.decode_cf_variables(original.variables, {}, decode_coords="all")
372+
373+
del original.temp.attrs["grid_mapping"]
374+
original.temp.attrs["formula_terms"] = "A: lat D: lon E: crs2"
375+
vars, attrs, coords = conventions.decode_cf_variables(
376+
original.variables, {}, decode_coords="all"
377+
)
378+
assert coords == {"lat", "lon", "crs2"}
379+
380+
original.temp.attrs["formula_terms"] = "A: lat lon D: crs E: crs2"
381+
with pytest.warns(UserWarning, match="has malformed content"):
382+
vars, attrs, coords = conventions.decode_cf_variables(
383+
original.variables, {}, decode_coords="all"
384+
)
385+
assert coords == {"lat", "lon", "crs", "crs2"}
386+
297387
def test_0d_int32_encoding(self) -> None:
298388
original = Variable((), np.int32(0), encoding={"dtype": "int64"})
299389
expected = Variable((), np.int64(0))

0 commit comments

Comments
 (0)