Skip to content

Commit cf0d0ea

Browse files
authored
Default to phony_dims="access" in h5netcdf-backend (#10058)
* Default to phony_dims="access" in open_datatree for h5ntecdf-backend. Warn user about behaviour change. * relocate * duplicate as needed in both places * ignore warning * conditionally warn users if phony_dims are found * add test for warning * add whats-new.rst entry * remove unneeded assignment to fix typing * Update doc/whats-new.rst * use phony_dims="access" per default also in open_dataset for h5netcdf backend * fix test * fix whats-new.rst
1 parent 0caf096 commit cf0d0ea

File tree

4 files changed

+88
-2
lines changed

4 files changed

+88
-2
lines changed

doc/whats-new.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,13 @@ New Features
3333

3434
Breaking changes
3535
~~~~~~~~~~~~~~~~
36-
36+
- Warn instead of raise if phony_dims are detected when using h5netcdf-backend and ``phony_dims=None`` (:issue:`10049`, :pull:`10058`)
37+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
3738

3839
Deprecations
3940
~~~~~~~~~~~~
40-
41+
- Move from phony_dims=None to phony_dims="access" for h5netcdf-backend(:issue:`10049`, :pull:`10058`)
42+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
4143

4244
Bug fixes
4345
~~~~~~~~~

xarray/backends/h5netcdf_.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,23 @@ def close(self, **kwargs):
372372
self._manager.close(**kwargs)
373373

374374

375+
def _check_phony_dims(phony_dims):
376+
emit_phony_dims_warning = False
377+
if phony_dims is None:
378+
emit_phony_dims_warning = True
379+
phony_dims = "access"
380+
return emit_phony_dims_warning, phony_dims
381+
382+
383+
def _emit_phony_dims_warning():
384+
emit_user_level_warning(
385+
"The 'phony_dims' kwarg now defaults to 'access'. "
386+
"Previously 'phony_dims=None' would raise an error. "
387+
"For full netcdf equivalence please use phony_dims='sort'.",
388+
UserWarning,
389+
)
390+
391+
375392
class H5netcdfBackendEntrypoint(BackendEntrypoint):
376393
"""
377394
Backend for netCDF files based on the h5netcdf package.
@@ -434,6 +451,10 @@ def open_dataset(
434451
driver_kwds=None,
435452
storage_options: dict[str, Any] | None = None,
436453
) -> Dataset:
454+
# Keep this message for some versions
455+
# remove and set phony_dims="access" above
456+
emit_phony_dims_warning, phony_dims = _check_phony_dims(phony_dims)
457+
437458
filename_or_obj = _normalize_path(filename_or_obj)
438459
store = H5NetCDFStore.open(
439460
filename_or_obj,
@@ -460,6 +481,13 @@ def open_dataset(
460481
use_cftime=use_cftime,
461482
decode_timedelta=decode_timedelta,
462483
)
484+
485+
# only warn if phony_dims exist in file
486+
# remove together with the above check
487+
# after some versions
488+
if store.ds._root._phony_dim_count > 0 and emit_phony_dims_warning:
489+
_emit_phony_dims_warning()
490+
463491
return ds
464492

465493
def open_datatree(
@@ -530,6 +558,10 @@ def open_groups_as_dict(
530558
from xarray.core.treenode import NodePath
531559
from xarray.core.utils import close_on_error
532560

561+
# Keep this message for some versions
562+
# remove and set phony_dims="access" above
563+
emit_phony_dims_warning, phony_dims = _check_phony_dims(phony_dims)
564+
533565
filename_or_obj = _normalize_path(filename_or_obj)
534566
store = H5NetCDFStore.open(
535567
filename_or_obj,
@@ -542,6 +574,7 @@ def open_groups_as_dict(
542574
driver=driver,
543575
driver_kwds=driver_kwds,
544576
)
577+
545578
# Check for a group and make it a parent if it exists
546579
if group:
547580
parent = NodePath("/") / NodePath(group)
@@ -571,6 +604,12 @@ def open_groups_as_dict(
571604
group_name = str(NodePath(path_group))
572605
groups_dict[group_name] = group_ds
573606

607+
# only warn if phony_dims exist in file
608+
# remove together with the above check
609+
# after some versions
610+
if store.ds._phony_dim_count > 0 and emit_phony_dims_warning:
611+
_emit_phony_dims_warning()
612+
574613
return groups_dict
575614

576615

xarray/tests/test_backends.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4170,6 +4170,28 @@ def test_roundtrip_complex(self):
41704170
with self.roundtrip(expected) as actual:
41714171
assert_equal(expected, actual)
41724172

4173+
def test_phony_dims_warning(self) -> None:
4174+
import h5py
4175+
4176+
foo_data = np.arange(125).reshape(5, 5, 5)
4177+
bar_data = np.arange(625).reshape(25, 5, 5)
4178+
var = {"foo1": foo_data, "foo2": bar_data, "foo3": foo_data, "foo4": bar_data}
4179+
with create_tmp_file() as tmp_file:
4180+
with h5py.File(tmp_file, "w") as f:
4181+
grps = ["bar", "baz"]
4182+
for grp in grps:
4183+
fx = f.create_group(grp)
4184+
for k, v in var.items():
4185+
fx.create_dataset(k, data=v)
4186+
with pytest.warns(UserWarning, match="The 'phony_dims' kwarg"):
4187+
with xr.open_dataset(tmp_file, engine="h5netcdf", group="bar") as ds:
4188+
assert ds.dims == {
4189+
"phony_dim_0": 5,
4190+
"phony_dim_1": 5,
4191+
"phony_dim_2": 5,
4192+
"phony_dim_3": 25,
4193+
}
4194+
41734195

41744196
@requires_h5netcdf
41754197
@requires_netCDF4

xarray/tests/test_backends_datatree.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,29 @@ def test_open_datatree_specific_group(self, tmpdir, simple_datatree) -> None:
372372
class TestH5NetCDFDatatreeIO(DatatreeIOBase):
373373
engine: T_DataTreeNetcdfEngine | None = "h5netcdf"
374374

375+
def test_phony_dims_warning(self, tmpdir) -> None:
376+
filepath = tmpdir + "/phony_dims.nc"
377+
import h5py
378+
379+
foo_data = np.arange(125).reshape(5, 5, 5)
380+
bar_data = np.arange(625).reshape(25, 5, 5)
381+
var = {"foo1": foo_data, "foo2": bar_data, "foo3": foo_data, "foo4": bar_data}
382+
with h5py.File(filepath, "w") as f:
383+
grps = ["bar", "baz"]
384+
for grp in grps:
385+
fx = f.create_group(grp)
386+
for k, v in var.items():
387+
fx.create_dataset(k, data=v)
388+
389+
with pytest.warns(UserWarning, match="The 'phony_dims' kwarg"):
390+
with open_datatree(filepath, engine=self.engine) as tree:
391+
assert tree.bar.dims == {
392+
"phony_dim_0": 5,
393+
"phony_dim_1": 5,
394+
"phony_dim_2": 5,
395+
"phony_dim_3": 25,
396+
}
397+
375398

376399
@pytest.mark.skipif(
377400
have_zarr_v3, reason="datatree support for zarr 3 is not implemented yet"

0 commit comments

Comments
 (0)