From 3ec575dd4372fdaa4d81b1334a51340d13adc4e3 Mon Sep 17 00:00:00 2001 From: pratiman-91 Date: Thu, 16 Jan 2025 23:06:43 +0800 Subject: [PATCH 01/43] GH6736 --- xarray/backends/api.py | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 3211b9efbae..146bdf5fc7a 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import warnings from collections.abc import ( Callable, Hashable, @@ -1393,6 +1394,7 @@ def open_mfdataset( join: JoinOptions = "outer", attrs_file: str | os.PathLike | None = None, combine_attrs: CombineAttrsOptions = "override", + errors: str = "raise", **kwargs, ) -> Dataset: """Open multiple files as a single dataset. @@ -1519,7 +1521,11 @@ def open_mfdataset( If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. - **kwargs : optional + errors : {'ignore', 'raise', 'warn'}, default 'raise' + - If 'raise', then invalid dataset will raise an exception. + - If 'ignore', then invalid dataset will be ignored. + - If 'warn', then a warning will be issued for each invalid dataset. + **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. For an overview of some of the possible options, see the documentation of :py:func:`xarray.open_dataset` @@ -1611,7 +1617,36 @@ def open_mfdataset( open_ = open_dataset getattr_ = getattr - datasets = [open_(p, **open_kwargs) for p in paths1d] + try: + if errors == "raise": + datasets = [open_(p, **open_kwargs) for p in paths1d] + elif errors == "ignore": + datasets = [] + for p in paths1d: + try: + ds = open_(p, **open_kwargs) + datasets.append(ds) + except Exception: + continue + elif errors == "warn": + datasets = [] + for p in paths1d: + try: + ds = open_(p, **open_kwargs) + datasets.append(ds) + except Exception: + warnings.warn( + f"Could not open {p}. Ignoring.", UserWarning, stacklevel=2 + ) + continue + else: + raise ValueError( + f"{errors} is an invalid option for the keyword argument ``errors``" + ) + except ValueError: + for ds in datasets: + ds.close() + closers = [getattr_(ds, "_close") for ds in datasets] if preprocess is not None: datasets = [preprocess(ds) for ds in datasets] From 5b95c213af0b17a8c67d79be7c1726eec577ee8c Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Thu, 16 Jan 2025 23:27:56 +0800 Subject: [PATCH 02/43] Updated whats-new.rst --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 95aa5a57438..47d2cce24c7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -52,6 +52,8 @@ New Features ~~~~~~~~~~~~ - Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`). By `Kai Mühlbauer `_ and `Spencer Clark `_. +- Add new ``errors`` arg to :py:meth:`open_mfdataset` to better handle invalid files. + (:issue:`6736`). By `Pratiman Patel `_. Breaking changes ~~~~~~~~~~~~~~~~ From 9249bf353431cdf81f0c51038cd68124897cdb11 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 17 Jan 2025 10:53:29 +0800 Subject: [PATCH 03/43] Update xarray/backends/api.py Co-authored-by: Michael Niklas --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 146bdf5fc7a..78e0e78e705 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1394,7 +1394,7 @@ def open_mfdataset( join: JoinOptions = "outer", attrs_file: str | os.PathLike | None = None, combine_attrs: CombineAttrsOptions = "override", - errors: str = "raise", + errors: ErrorOptionsWithWarn = "raise", **kwargs, ) -> Dataset: """Open multiple files as a single dataset. From 1eb6422c592abb272acc9af6fe8167a96ffe41da Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 17 Jan 2025 13:37:37 +0800 Subject: [PATCH 04/43] Updated logic --- xarray/backends/api.py | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 78e0e78e705..13b2e091fb3 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1617,35 +1617,26 @@ def open_mfdataset( open_ = open_dataset getattr_ = getattr - try: - if errors == "raise": - datasets = [open_(p, **open_kwargs) for p in paths1d] - elif errors == "ignore": - datasets = [] - for p in paths1d: - try: - ds = open_(p, **open_kwargs) - datasets.append(ds) - except Exception: - continue - elif errors == "warn": - datasets = [] - for p in paths1d: - try: - ds = open_(p, **open_kwargs) - datasets.append(ds) - except Exception: + if errors in ("raise", "warn", "ignore"): + datasets = [] + for p in paths1d: + try: + ds = open_(p, **open_kwargs) + datasets.append(ds) + except Exception: + if errors == "raise": + raise + elif errors == "ignore": warnings.warn( f"Could not open {p}. Ignoring.", UserWarning, stacklevel=2 ) continue - else: - raise ValueError( - f"{errors} is an invalid option for the keyword argument ``errors``" - ) - except ValueError: - for ds in datasets: - ds.close() + else: + continue + else: + raise ValueError( + f"{errors} is an invalid option for the keyword argument ``errors``" + ) closers = [getattr_(ds, "_close") for ds in datasets] if preprocess is not None: From 8005e3360e4e9f1fdafff472465a22d3165c7205 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Sun, 19 Jan 2025 18:09:40 +0800 Subject: [PATCH 05/43] Added tests and modifiede the logic to get correct ids for concat --- doc/whats-new.rst | 2 +- xarray/backends/api.py | 47 +++++++++++++++++++---------------- xarray/tests/test_backends.py | 31 +++++++++++++++++++++++ 3 files changed, 57 insertions(+), 23 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 47d2cce24c7..043d47ffca5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -53,7 +53,7 @@ New Features - Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`). By `Kai Mühlbauer `_ and `Spencer Clark `_. - Add new ``errors`` arg to :py:meth:`open_mfdataset` to better handle invalid files. - (:issue:`6736`). By `Pratiman Patel `_. + (:issue:`6736`, :pull:`9955`). By `Pratiman Patel `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 13b2e091fb3..61288a2f387 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1521,10 +1521,11 @@ def open_mfdataset( If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. - errors : {'ignore', 'raise', 'warn'}, default 'raise' + errors : {'raise', 'warn', 'ignore'}, default 'raise' - If 'raise', then invalid dataset will raise an exception. - - If 'ignore', then invalid dataset will be ignored. - If 'warn', then a warning will be issued for each invalid dataset. + - If 'ignore', then invalid dataset will be ignored. + **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. For an overview of some of the possible options, see the documentation of @@ -1617,26 +1618,27 @@ def open_mfdataset( open_ = open_dataset getattr_ = getattr - if errors in ("raise", "warn", "ignore"): - datasets = [] - for p in paths1d: - try: - ds = open_(p, **open_kwargs) - datasets.append(ds) - except Exception: - if errors == "raise": - raise - elif errors == "ignore": - warnings.warn( - f"Could not open {p}. Ignoring.", UserWarning, stacklevel=2 - ) - continue - else: - continue - else: - raise ValueError( - f"{errors} is an invalid option for the keyword argument ``errors``" - ) + if errors not in ("raise", "warn", "ignore"): + raise ValueError(f"'errors' must be 'raise', 'warn' or 'ignore', got '{errors}'") + + datasets = [] + invalid_ids = set() # to remove invalid ids for 'combine' + for i, p in enumerate(paths1d): + try: + ds = open_(p, **open_kwargs) + datasets.append(ds) + except Exception: + if errors == "raise": + raise + elif errors == "warn": + warnings.warn( + f"Could not open {p}. Ignoring.", UserWarning, stacklevel=2 + ) + invalid_ids.add(i) + continue + else: + invalid_ids.add(i) + continue closers = [getattr_(ds, "_close") for ds in datasets] if preprocess is not None: @@ -1652,6 +1654,7 @@ def open_mfdataset( if combine == "nested": # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" + ids = [id_ for i, id_ in enumerate(ids) if i not in invalid_ids] combined = _nested_combine( datasets, concat_dims=concat_dim, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 72078da11b9..d71bebc1ef9 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4831,6 +4831,37 @@ def test_open_mfdataset_2(self) -> None: ) as actual: assert_identical(original, actual) + def test_open_mfdataset_with_ignore(self) -> None: + original = Dataset({"foo": ("x", np.random.randn(10))}) + with create_tmp_files(2) as (tmp1, tmp2): + ds1 = original.isel(x=slice(5)) + ds2 = original.isel(x=slice(5, 10)) + ds1.to_netcdf(tmp1) + ds2.to_netcdf(tmp2) + with open_mfdataset( + [tmp1, tmp2, "non-existent-file.nc"], + concat_dim="x", + combine="nested", + errors="ignore", + ) as actual: + assert_identical(original, actual) + + def test_open_mfdataset_with_warn(self) -> None: + original = Dataset({"foo": ("x", np.random.randn(10))}) + with pytest.warns(UserWarning, match="Ignoring."): + with create_tmp_files(2) as (tmp1, tmp2): + ds1 = original.isel(x=slice(5)) + ds2 = original.isel(x=slice(5, 10)) + ds1.to_netcdf(tmp1) + ds2.to_netcdf(tmp2) + with open_mfdataset( + [tmp1, tmp2, "non-existent-file.nc"], + concat_dim="x", + combine="nested", + errors="warn", + ) as actual: + assert_identical(original, actual) + def test_attrs_mfdataset(self) -> None: original = Dataset({"foo": ("x", np.random.randn(10))}) with create_tmp_file() as tmp1: From 3bfaaee6e61b782b621d8d5de584c5545f55993c Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Sun, 19 Jan 2025 22:42:51 +0800 Subject: [PATCH 06/43] Added new tests and logic to handle 2x2 open_mfdataset with ignore and warn. --- xarray/backends/api.py | 8 ++++---- xarray/tests/test_backends.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 61288a2f387..0f4f5fc0ca1 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1622,22 +1622,23 @@ def open_mfdataset( raise ValueError(f"'errors' must be 'raise', 'warn' or 'ignore', got '{errors}'") datasets = [] - invalid_ids = set() # to remove invalid ids for 'combine' for i, p in enumerate(paths1d): try: ds = open_(p, **open_kwargs) datasets.append(ds) except Exception: + # remove invalid ids and paths + if combine == "nested": + ids.pop(i) + paths1d.pop(i) if errors == "raise": raise elif errors == "warn": warnings.warn( f"Could not open {p}. Ignoring.", UserWarning, stacklevel=2 ) - invalid_ids.add(i) continue else: - invalid_ids.add(i) continue closers = [getattr_(ds, "_close") for ds in datasets] @@ -1654,7 +1655,6 @@ def open_mfdataset( if combine == "nested": # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" - ids = [id_ for i, id_ in enumerate(ids) if i not in invalid_ids] combined = _nested_combine( datasets, concat_dims=concat_dim, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index d71bebc1ef9..0f171ba7975 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4862,6 +4862,37 @@ def test_open_mfdataset_with_warn(self) -> None: ) as actual: assert_identical(original, actual) + def test_open_mfdataset_2d_with_ignore(self) -> None: + original = Dataset({"foo": (["x", "y"], np.random.randn(10, 8))}) + with create_tmp_files(4) as (tmp1, tmp2, tmp3, tmp4): + original.isel(x=slice(5), y=slice(4)).to_netcdf(tmp1) + original.isel(x=slice(5, 10), y=slice(4)).to_netcdf(tmp2) + original.isel(x=slice(5), y=slice(4, 8)).to_netcdf(tmp3) + original.isel(x=slice(5, 10), y=slice(4, 8)).to_netcdf(tmp4) + with open_mfdataset( + [[tmp1, tmp2], [tmp3, tmp4, "non-existent-file.nc"]], + combine="nested", + concat_dim=["y", "x"], + errors="ignore", + ) as actual: + assert_identical(original, actual) + + def test_open_mfdataset_2d_with_warn(self) -> None: + original = Dataset({"foo": (["x", "y"], np.random.randn(10, 8))}) + with pytest.warns(UserWarning, match="Ignoring."): + with create_tmp_files(4) as (tmp1, tmp2, tmp3, tmp4): + original.isel(x=slice(5), y=slice(4)).to_netcdf(tmp1) + original.isel(x=slice(5, 10), y=slice(4)).to_netcdf(tmp2) + original.isel(x=slice(5), y=slice(4, 8)).to_netcdf(tmp3) + original.isel(x=slice(5, 10), y=slice(4, 8)).to_netcdf(tmp4) + with open_mfdataset( + [[tmp1, tmp2], [tmp3, tmp4, "non-existent-file.nc"]], + combine="nested", + concat_dim=["y", "x"], + errors="warn", + ) as actual: + assert_identical(original, actual) + def test_attrs_mfdataset(self) -> None: original = Dataset({"foo": ("x", np.random.randn(10))}) with create_tmp_file() as tmp1: From f6210309ad97d13c0c47dc678061dab62ac8b244 Mon Sep 17 00:00:00 2001 From: pratiman-91 Date: Mon, 20 Jan 2025 07:40:31 +0800 Subject: [PATCH 07/43] pre-commit run --- xarray/backends/api.py | 13 +++++++------ xarray/tests/test_backends.py | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 0f4f5fc0ca1..30a74e779ab 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -61,6 +61,7 @@ from xarray.core.types import ( CombineAttrsOptions, CompatOptions, + ErrorOptionsWithWarn, JoinOptions, NestedSequence, ReadBuffer, @@ -104,8 +105,7 @@ def _get_default_engine_remote_uri() -> Literal["netcdf4", "pydap"]: engine = "pydap" except ImportError as err: raise ValueError( - "netCDF4 or pydap is required for accessing " - "remote datasets via OPeNDAP" + "netCDF4 or pydap is required for accessing remote datasets via OPeNDAP" ) from err return engine @@ -1525,7 +1525,7 @@ def open_mfdataset( - If 'raise', then invalid dataset will raise an exception. - If 'warn', then a warning will be issued for each invalid dataset. - If 'ignore', then invalid dataset will be ignored. - + **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. For an overview of some of the possible options, see the documentation of @@ -1619,7 +1619,9 @@ def open_mfdataset( getattr_ = getattr if errors not in ("raise", "warn", "ignore"): - raise ValueError(f"'errors' must be 'raise', 'warn' or 'ignore', got '{errors}'") + raise ValueError( + f"'errors' must be 'raise', 'warn' or 'ignore', got '{errors}'" + ) datasets = [] for i, p in enumerate(paths1d): @@ -1678,8 +1680,7 @@ def open_mfdataset( ) else: raise ValueError( - f"{combine} is an invalid option for the keyword argument" - " ``combine``" + f"{combine} is an invalid option for the keyword argument ``combine``" ) except ValueError: for ds in datasets: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0f171ba7975..dce99ccc624 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4892,7 +4892,7 @@ def test_open_mfdataset_2d_with_warn(self) -> None: errors="warn", ) as actual: assert_identical(original, actual) - + def test_attrs_mfdataset(self) -> None: original = Dataset({"foo": ("x", np.random.randn(10))}) with create_tmp_file() as tmp1: From b9f04c8ef31a4a8584dbab165ebb82b34400eda8 Mon Sep 17 00:00:00 2001 From: pratiman-91 Date: Sun, 16 Feb 2025 12:49:47 +0800 Subject: [PATCH 08/43] new logic to add nested paths --- xarray/backends/api.py | 46 ++++++++++++++++++++++++++++++++--- xarray/tests/test_backends.py | 8 +++--- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 30a74e779ab..269def67d3f 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1369,6 +1369,38 @@ def open_groups( return groups +def remove_path(paths, path_to_remove): + """ + Recursively removes specific path from a nested or non-nested list. + + Parameters + ---------- + paths: list + The path list (nested or not) from which to remove paths. + path_to_remove: str or list + The path to be removed. + + Returns + ------- + list + A new list with specified paths removed. + """ + # Initialize an empty list to store the result + result = [] + + for item in paths: + if isinstance(item, list): + # If the current item is a list, recursively call remove_elements on it + nested_result = remove_path(item, path_to_remove) + if nested_result: # Only add non-empty lists to avoid adding empty lists + result.append(nested_result) + elif item not in path_to_remove: + # Add the item to the result if it is not in the set of elements to remove + result.append(item) + + return result + + def open_mfdataset( paths: str | os.PathLike @@ -1624,15 +1656,16 @@ def open_mfdataset( ) datasets = [] - for i, p in enumerate(paths1d): + remove_paths = False + for p in paths1d: try: ds = open_(p, **open_kwargs) datasets.append(ds) except Exception: - # remove invalid ids and paths + # remove invalid paths if combine == "nested": - ids.pop(i) - paths1d.pop(i) + paths = remove_path(paths, p) + remove_paths = True if errors == "raise": raise elif errors == "warn": @@ -1655,6 +1688,11 @@ def open_mfdataset( # Combine all datasets, closing them in case of a ValueError try: if combine == "nested": + # Create new ids and paths based on removed items + if remove_paths: + combined_ids_paths = _infer_concat_order_from_positions(paths) + ids = list(combined_ids_paths.keys()) + # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" combined = _nested_combine( diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index dce99ccc624..86e2b2a9cc0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4839,7 +4839,7 @@ def test_open_mfdataset_with_ignore(self) -> None: ds1.to_netcdf(tmp1) ds2.to_netcdf(tmp2) with open_mfdataset( - [tmp1, tmp2, "non-existent-file.nc"], + [tmp1, "non-existent-file.nc", tmp2], concat_dim="x", combine="nested", errors="ignore", @@ -4855,7 +4855,7 @@ def test_open_mfdataset_with_warn(self) -> None: ds1.to_netcdf(tmp1) ds2.to_netcdf(tmp2) with open_mfdataset( - [tmp1, tmp2, "non-existent-file.nc"], + [tmp1, "non-existent-file.nc", tmp2], concat_dim="x", combine="nested", errors="warn", @@ -4870,7 +4870,7 @@ def test_open_mfdataset_2d_with_ignore(self) -> None: original.isel(x=slice(5), y=slice(4, 8)).to_netcdf(tmp3) original.isel(x=slice(5, 10), y=slice(4, 8)).to_netcdf(tmp4) with open_mfdataset( - [[tmp1, tmp2], [tmp3, tmp4, "non-existent-file.nc"]], + [[tmp1, tmp2], ["non-existent-file.nc", tmp3, tmp4]], combine="nested", concat_dim=["y", "x"], errors="ignore", @@ -4886,7 +4886,7 @@ def test_open_mfdataset_2d_with_warn(self) -> None: original.isel(x=slice(5), y=slice(4, 8)).to_netcdf(tmp3) original.isel(x=slice(5, 10), y=slice(4, 8)).to_netcdf(tmp4) with open_mfdataset( - [[tmp1, tmp2], [tmp3, tmp4, "non-existent-file.nc"]], + [[tmp1, tmp2, "non-existent-file.nc"], [tmp3, tmp4]], combine="nested", concat_dim=["y", "x"], errors="warn", From 0657014b650b03e2134fcaa86a1d496426e8a11e Mon Sep 17 00:00:00 2001 From: pratiman-91 Date: Fri, 4 Apr 2025 10:25:43 +0800 Subject: [PATCH 09/43] made remove_path a private function and updated whats-new.rst --- doc/whats-new.rst | 5 +++-- xarray/backends/api.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 043d47ffca5..e3ef60d969b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,10 +50,11 @@ eventually be deprecated. New Features ~~~~~~~~~~~~ -- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`). - By `Kai Mühlbauer `_ and `Spencer Clark `_. - Add new ``errors`` arg to :py:meth:`open_mfdataset` to better handle invalid files. (:issue:`6736`, :pull:`9955`). By `Pratiman Patel `_. +- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`). + By `Kai Mühlbauer `_ and `Spencer Clark `_. + Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 269def67d3f..57cd2520505 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1369,7 +1369,7 @@ def open_groups( return groups -def remove_path(paths, path_to_remove): +def _remove_path(paths, path_to_remove) -> list: """ Recursively removes specific path from a nested or non-nested list. @@ -1391,7 +1391,7 @@ def remove_path(paths, path_to_remove): for item in paths: if isinstance(item, list): # If the current item is a list, recursively call remove_elements on it - nested_result = remove_path(item, path_to_remove) + nested_result = _remove_path(item, path_to_remove) if nested_result: # Only add non-empty lists to avoid adding empty lists result.append(nested_result) elif item not in path_to_remove: @@ -1664,7 +1664,7 @@ def open_mfdataset( except Exception: # remove invalid paths if combine == "nested": - paths = remove_path(paths, p) + paths = _remove_path(paths, p) remove_paths = True if errors == "raise": raise From ffc3c53e2767e8fccbedd4aae36a0aee50e05204 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Tue, 15 Apr 2025 09:59:14 +0800 Subject: [PATCH 10/43] Updated whats-new.rst --- doc/whats-new.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 62a7e7ff28f..f53c3a854aa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,7 +24,9 @@ New Features - Added `scipy-stubs `_ to the ``xarray[types]`` dependencies. By `Joren Hammudoglu `_. - +- Added ``errors`` arg to :py:meth:`open_mfdataset` to better handle invalid files. + (:issue:`6736`, :pull:`9955`). By `Pratiman Patel `_. + Breaking changes ~~~~~~~~~~~~~~~~ From efe16420c4175f089833860ccc22e9fa222b678b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Apr 2025 01:59:37 +0000 Subject: [PATCH 11/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f53c3a854aa..e63bfaf018f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,7 +26,7 @@ New Features By `Joren Hammudoglu `_. - Added ``errors`` arg to :py:meth:`open_mfdataset` to better handle invalid files. (:issue:`6736`, :pull:`9955`). By `Pratiman Patel `_. - + Breaking changes ~~~~~~~~~~~~~~~~ From fc286d575fc7cdec5f6efe0ac825db92425822a5 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 25 Apr 2025 10:45:33 +0800 Subject: [PATCH 12/43] removed entry to whats-new.rst --- doc/whats-new.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1d3d6d3bf5d..25f5d7a0aae 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,9 +24,7 @@ New Features - Added `scipy-stubs `_ to the ``xarray[types]`` dependencies. By `Joren Hammudoglu `_. -- Added ``errors`` arg to :py:meth:`open_mfdataset` to better handle invalid files. - (:issue:`6736`, :pull:`9955`). By `Pratiman Patel `_. - + Breaking changes ~~~~~~~~~~~~~~~~ From ae0aa488cf7f2e45b3c352abbdae56809c439e6f Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 25 Apr 2025 10:56:34 +0800 Subject: [PATCH 13/43] Remove conflict --- doc/whats-new.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 25f5d7a0aae..30cce50b20e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,9 +22,7 @@ v2025.04.0 (unreleased) New Features ~~~~~~~~~~~~ -- Added `scipy-stubs `_ to the ``xarray[types]`` dependencies. - By `Joren Hammudoglu `_. - + Breaking changes ~~~~~~~~~~~~~~~~ From c27ac70a2c1bd67e193fa01cab00b02c59aa6fbb Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 25 Apr 2025 10:59:46 +0800 Subject: [PATCH 14/43] Whats-new conflicts --- doc/whats-new.rst | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 30cce50b20e..cc9b1597248 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,10 +22,22 @@ v2025.04.0 (unreleased) New Features ~~~~~~~~~~~~ +- Added `scipy-stubs `_ to the ``xarray[types]`` dependencies. + By `Joren Hammudoglu `_. +- Improved compatibility with OPeNDAP DAP4 data model for backend engine ``pydap``. This + includes ``datatree`` support, and removing slashes from dimension names. By + `Miguel Jimenez-Urias `_. Breaking changes ~~~~~~~~~~~~~~~~ +- The minimum versions of some dependencies were changed + + ===================== ========= ======= + Package Old New + ===================== ========= ======= + pydap 3.4 3.5.0 + ===================== ========= ======= Deprecations ~~~~~~~~~~~~ @@ -45,6 +57,8 @@ Documentation - Fix references to core classes in docs (:issue:`10195`, :pull:`10207`). By `Mattia Almansi `_. +- Fix references to point to updated pydap documentation (:pull:`10182`). + By `Miguel Jimenez-Urias `_. Internal Changes ~~~~~~~~~~~~~~~~ @@ -60,9 +74,11 @@ Andrecho, Deepak Cherian, Ian Hunt-Isaak, Karl Krauth, Mathias Hauser, Maximilia New Features ~~~~~~~~~~~~ - - Allow setting a ``fill_value`` for Zarr format 3 arrays. Specify ``fill_value`` in ``encoding`` as usual. (:issue:`10064`). By `Deepak Cherian `_. +- Added :py:class:`indexes.RangeIndex` as an alternative, memory saving Xarray index representing + a 1-dimensional bounded interval with evenly spaced floating values (:issue:`8473`, :pull:`10076`). + By `Benoit Bovy `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -244,8 +260,26 @@ eventually be deprecated. New Features ~~~~~~~~~~~~ -- Add new ``errors`` arg to :py:meth:`open_mfdataset` to better handle invalid files. - (:issue:`6736`, :pull:`9955`). By `Pratiman Patel `_. +- Relax nanosecond resolution restriction in CF time coding and permit + :py:class:`numpy.datetime64` or :py:class:`numpy.timedelta64` dtype arrays + with ``"s"``, ``"ms"``, ``"us"``, or ``"ns"`` resolution throughout xarray + (:issue:`7493`, :pull:`9618`, :pull:`9977`, :pull:`9966`, :pull:`9999`). By + `Kai Mühlbauer `_ and `Spencer Clark + `_. +- Enable the ``compute=False`` option in :py:meth:`DataTree.to_zarr`. (:pull:`9958`). + By `Sam Levang `_. +- Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`) + By `Jimmy Westling `_. +- Added a ``time_unit`` argument to :py:meth:`CFTimeIndex.to_datetimeindex`. + Note that in a future version of xarray, + :py:meth:`CFTimeIndex.to_datetimeindex` will return a microsecond-resolution + :py:class:`pandas.DatetimeIndex` instead of a nanosecond-resolution + :py:class:`pandas.DatetimeIndex` (:pull:`9965`). By `Spencer Clark + `_ and `Kai Mühlbauer + `_. +- Adds shards to the list of valid_encodings in the zarr backend, so that + sharded Zarr V3s can be written (:issue:`9947`, :pull:`9948`). + By `Jacob Prince_Bieker `_ Deprecations ~~~~~~~~~~~~ @@ -8614,4 +8648,4 @@ Miles. v0.1 (2 May 2014) ----------------- -Initial release. +Initial release. \ No newline at end of file From 4a00d26e44e749316bf3521cf9bb79fd3a28bcb9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 25 Apr 2025 03:01:45 +0000 Subject: [PATCH 15/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cc9b1597248..48cd69ad82d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -8648,4 +8648,4 @@ Miles. v0.1 (2 May 2014) ----------------- -Initial release. \ No newline at end of file +Initial release. From 860be1e359b6d1aa59bf5e273fb5b452ad8ceb18 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 30 May 2025 15:10:08 +0800 Subject: [PATCH 16/43] modify docs --- xarray/backends/api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 0b39b67a83a..80be329a036 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1575,11 +1575,11 @@ def open_mfdataset( If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. - errors : {'raise', 'warn', 'ignore'}, default 'raise' - - If 'raise', then invalid dataset will raise an exception. - - If 'warn', then a warning will be issued for each invalid dataset. - - If 'ignore', then invalid dataset will be ignored. + errors : {"raise", "warn", "ignore"}, default: "raise" + - "raise": invalid dataset will raise an exception. + - "warn": a warning will be issued for each invalid dataset. + - "ignore": then invalid dataset will be ignored. **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. For an overview of some of the possible options, see the documentation of From 0451d13c9b45f0006745f866784a3170d3f40b25 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 30 May 2025 18:02:24 +0800 Subject: [PATCH 17/43] modify doc-strings --- xarray/backends/api.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 80be329a036..197ed0fd552 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1392,21 +1392,6 @@ def open_groups( def _remove_path(paths, path_to_remove) -> list: - """ - Recursively removes specific path from a nested or non-nested list. - - Parameters - ---------- - paths: list - The path list (nested or not) from which to remove paths. - path_to_remove: str or list - The path to be removed. - - Returns - ------- - list - A new list with specified paths removed. - """ # Initialize an empty list to store the result result = [] @@ -1576,11 +1561,12 @@ def open_mfdataset( If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. errors : {"raise", "warn", "ignore"}, default: "raise" + String indicating how to handle errors in opeining dataset. - "raise": invalid dataset will raise an exception. - "warn": a warning will be issued for each invalid dataset. - "ignore": then invalid dataset will be ignored. - **kwargs : optional + **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. For an overview of some of the possible options, see the documentation of :py:func:`xarray.open_dataset` From 05cb2f0cbd610adec8bb757ede0100ec4157b7f3 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 30 May 2025 19:55:14 +0800 Subject: [PATCH 18/43] Update xarray/backends/api.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kai Mühlbauer --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 197ed0fd552..babab5581d5 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1561,7 +1561,7 @@ def open_mfdataset( If a callable, it must expect a sequence of ``attrs`` dicts and a context object as its only parameters. errors : {"raise", "warn", "ignore"}, default: "raise" - String indicating how to handle errors in opeining dataset. + String indicating how to handle errors in opening dataset. - "raise": invalid dataset will raise an exception. - "warn": a warning will be issued for each invalid dataset. From 607b6f0697a6dd150165571f9fe8a03478e86d06 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 30 May 2025 19:55:52 +0800 Subject: [PATCH 19/43] Update xarray/backends/api.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kai Mühlbauer --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index babab5581d5..957a1420728 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1565,7 +1565,7 @@ def open_mfdataset( - "raise": invalid dataset will raise an exception. - "warn": a warning will be issued for each invalid dataset. - - "ignore": then invalid dataset will be ignored. + - "ignore": invalid dataset will be ignored. **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. For an overview of some of the possible options, see the documentation of From 0b67aa1f057cb6ba5279eeb73f09da8cbd84d217 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 30 May 2025 20:11:58 +0800 Subject: [PATCH 20/43] catch exception for warn --- xarray/backends/api.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 957a1420728..0f0550655a7 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1669,20 +1669,18 @@ def open_mfdataset( try: ds = open_(p, **open_kwargs) datasets.append(ds) - except Exception: + except Exception as e: # remove invalid paths if combine == "nested": paths = _remove_path(paths, p) remove_paths = True if errors == "raise": raise - elif errors == "warn": + if errors == "warn": warnings.warn( - f"Could not open {p}. Ignoring.", UserWarning, stacklevel=2 + f"Could not open {p} due to {e}. Ignoring.", UserWarning, stacklevel=2 ) - continue - else: - continue + continue closers = [getattr_(ds, "_close") for ds in datasets] if preprocess is not None: From 3e269ea26bf3f150ac5b7a7462c68ce87160c237 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 30 May 2025 12:12:36 +0000 Subject: [PATCH 21/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 0f0550655a7..8ee5d021f8e 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1678,7 +1678,9 @@ def open_mfdataset( raise if errors == "warn": warnings.warn( - f"Could not open {p} due to {e}. Ignoring.", UserWarning, stacklevel=2 + f"Could not open {p} due to {e}. Ignoring.", + UserWarning, + stacklevel=2, ) continue From 7c9867071d37f8afe91cde5615bfca1c5248dd91 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 30 May 2025 20:57:31 +0800 Subject: [PATCH 22/43] Update xarray/backends/api.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kai Mühlbauer --- xarray/backends/api.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 8ee5d021f8e..8dea06d577a 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1670,19 +1670,14 @@ def open_mfdataset( ds = open_(p, **open_kwargs) datasets.append(ds) except Exception as e: + if errors == "raise": + raise + elif errors == "warn": + emit_user_level_warnings(f"Could not open {p} due to {e}. Ignoring.") # remove invalid paths if combine == "nested": paths = _remove_path(paths, p) remove_paths = True - if errors == "raise": - raise - if errors == "warn": - warnings.warn( - f"Could not open {p} due to {e}. Ignoring.", - UserWarning, - stacklevel=2, - ) - continue closers = [getattr_(ds, "_close") for ds in datasets] if preprocess is not None: From 2567598a69626156c25c9c86ab3adc0851d0e6d0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 30 May 2025 12:58:21 +0000 Subject: [PATCH 23/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 8dea06d577a..f39ddddff9c 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1,7 +1,6 @@ from __future__ import annotations import os -import warnings from collections.abc import ( Callable, Hashable, From e0fa3baf3852ddb60000ce470beba67fdd4a3bec Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 30 May 2025 21:05:05 +0800 Subject: [PATCH 24/43] import emit_user_level_warning --- xarray/backends/api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f39ddddff9c..c0e8351080d 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -50,6 +50,7 @@ _nested_combine, combine_by_coords, ) +from xarray.core.utils import emit_user_level_warning if TYPE_CHECKING: try: From c24826c6b580915dbcc039fee06ba521f0c2e44c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 30 May 2025 13:06:43 +0000 Subject: [PATCH 25/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c0e8351080d..f39ddddff9c 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -50,7 +50,6 @@ _nested_combine, combine_by_coords, ) -from xarray.core.utils import emit_user_level_warning if TYPE_CHECKING: try: From f55644b1771abd1dce18b983e20f98fd6e6d216f Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 30 May 2025 21:13:02 +0800 Subject: [PATCH 26/43] retry importing emit_user_level_warning --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f39ddddff9c..027c7eb55b8 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -41,7 +41,7 @@ from xarray.core.indexes import Index from xarray.core.treenode import group_subtrees from xarray.core.types import NetcdfWriteModes, ZarrWriteModes -from xarray.core.utils import is_remote_uri +from xarray.core.utils import is_remote_uri, emit_user_level_warning from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager from xarray.structure.chunks import _get_chunk, _maybe_chunk From a726c4a769ce28957af9d66c8632d6fd4ecce15a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 30 May 2025 13:15:49 +0000 Subject: [PATCH 27/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 027c7eb55b8..f39ddddff9c 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -41,7 +41,7 @@ from xarray.core.indexes import Index from xarray.core.treenode import group_subtrees from xarray.core.types import NetcdfWriteModes, ZarrWriteModes -from xarray.core.utils import is_remote_uri, emit_user_level_warning +from xarray.core.utils import is_remote_uri from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager from xarray.structure.chunks import _get_chunk, _maybe_chunk From 16819bd6980db0a07e480beab458084bd2f37679 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 30 May 2025 21:18:00 +0800 Subject: [PATCH 28/43] emit_user_level_warning --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f39ddddff9c..53ddd1cbcf2 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1672,7 +1672,7 @@ def open_mfdataset( if errors == "raise": raise elif errors == "warn": - emit_user_level_warnings(f"Could not open {p} due to {e}. Ignoring.") + emit_user_level_warning(f"Could not open {p} due to {e}. Ignoring.") # remove invalid paths if combine == "nested": paths = _remove_path(paths, p) From 6105c0dcf396dda0b03682ec41c2aa52bdec7263 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Fri, 30 May 2025 21:18:28 +0800 Subject: [PATCH 29/43] adding import --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 53ddd1cbcf2..7ab6371a556 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -41,7 +41,7 @@ from xarray.core.indexes import Index from xarray.core.treenode import group_subtrees from xarray.core.types import NetcdfWriteModes, ZarrWriteModes -from xarray.core.utils import is_remote_uri +from xarray.core.utils import is_remote_uri, emit_user_level_warning from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager from xarray.structure.chunks import _get_chunk, _maybe_chunk From 75468a15c7126944f535a601930f13002af27550 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 30 May 2025 13:18:50 +0000 Subject: [PATCH 30/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 7ab6371a556..0b7c9c22454 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -41,7 +41,7 @@ from xarray.core.indexes import Index from xarray.core.treenode import group_subtrees from xarray.core.types import NetcdfWriteModes, ZarrWriteModes -from xarray.core.utils import is_remote_uri, emit_user_level_warning +from xarray.core.utils import emit_user_level_warning, is_remote_uri from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager from xarray.structure.chunks import _get_chunk, _maybe_chunk From 2716ea4587a66a4ac74b9d8a54be6b9f4afe324f Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Wed, 2 Jul 2025 17:49:33 +0800 Subject: [PATCH 31/43] Updated whats-new.rst --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 30a1c588c61..cd0badf8d26 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,10 @@ New Features ~~~~~~~~~~~~ - Expose :py:class:`~xarray.indexes.RangeIndex`, and :py:class:`~xarray.indexes.CoordinateTransformIndex` as public api under the ``xarray.indexes`` namespace. By `Deepak Cherian `_. +- Added new argument in `xarray.open_mfdataset` to better handle the invalid files + {'raise', 'warn', 'ignore'}, default 'raise'. If 'raise', then invalid dataset will raise an exception. + If 'ignore', then invalid dataset will be ignored. If 'warn', then a warning will be issued for each invalid dataset. + By `Pratiman Patel `_. Breaking changes ~~~~~~~~~~~~~~~~ From 0ce9a1e7ef0ebba8f6db22809cd3851d922d9497 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Jul 2025 09:50:06 +0000 Subject: [PATCH 32/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cd0badf8d26..eff108425f1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,7 +14,7 @@ New Features ~~~~~~~~~~~~ - Expose :py:class:`~xarray.indexes.RangeIndex`, and :py:class:`~xarray.indexes.CoordinateTransformIndex` as public api under the ``xarray.indexes`` namespace. By `Deepak Cherian `_. -- Added new argument in `xarray.open_mfdataset` to better handle the invalid files +- Added new argument in `xarray.open_mfdataset` to better handle the invalid files {'raise', 'warn', 'ignore'}, default 'raise'. If 'raise', then invalid dataset will raise an exception. If 'ignore', then invalid dataset will be ignored. If 'warn', then a warning will be issued for each invalid dataset. By `Pratiman Patel `_. From 1f395274c603bea78f4af0819902db5103639855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 2 Jul 2025 11:53:28 +0200 Subject: [PATCH 33/43] Update doc/whats-new.rst --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index eff108425f1..51fa9a11423 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,7 +14,7 @@ New Features ~~~~~~~~~~~~ - Expose :py:class:`~xarray.indexes.RangeIndex`, and :py:class:`~xarray.indexes.CoordinateTransformIndex` as public api under the ``xarray.indexes`` namespace. By `Deepak Cherian `_. -- Added new argument in `xarray.open_mfdataset` to better handle the invalid files +- Added new argument in ``xarray.open_mfdataset`` to better handle the invalid files {'raise', 'warn', 'ignore'}, default 'raise'. If 'raise', then invalid dataset will raise an exception. If 'ignore', then invalid dataset will be ignored. If 'warn', then a warning will be issued for each invalid dataset. By `Pratiman Patel `_. From 7bddbc40166b5275f5a9b809f1ad290809858717 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Fri, 4 Jul 2025 06:45:42 +0200 Subject: [PATCH 34/43] Update whats-new.rst --- doc/whats-new.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index afeba17e974..8a80296f6fd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -12,6 +12,10 @@ v2025.07.1 (unreleased) New Features ~~~~~~~~~~~~ +- Added new argument in ``xarray.open_mfdataset`` to better handle the invalid files + {'raise', 'warn', 'ignore'}, default 'raise'. If 'raise', then invalid dataset will raise an exception. + If 'ignore', then invalid dataset will be ignored. If 'warn', then a warning will be issued for each invalid dataset. + By `Pratiman Patel `_. Breaking changes @@ -49,10 +53,6 @@ New Features - Expose :py:class:`~xarray.indexes.RangeIndex`, and :py:class:`~xarray.indexes.CoordinateTransformIndex` as public api under the ``xarray.indexes`` namespace. By `Deepak Cherian `_. -- Added new argument in ``xarray.open_mfdataset`` to better handle the invalid files - {'raise', 'warn', 'ignore'}, default 'raise'. If 'raise', then invalid dataset will raise an exception. - If 'ignore', then invalid dataset will be ignored. If 'warn', then a warning will be issued for each invalid dataset. - By `Pratiman Patel `_. - Support zarr-python's new ``.supports_consolidated_metadata`` store property (:pull:`10457``). by `Tom Nicholas `_. - Better error messages when encoding data to be written to disk fails (:pull:`10464`). From 2af2ce39df84c0ec55dacbc7792459e2963732b1 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Wed, 9 Jul 2025 12:09:00 +0100 Subject: [PATCH 35/43] set of invalid files in a set and remove them only once --- doc/whats-new.rst | 4 +--- xarray/backends/api.py | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8a80296f6fd..d2e2d278c57 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -12,9 +12,7 @@ v2025.07.1 (unreleased) New Features ~~~~~~~~~~~~ -- Added new argument in ``xarray.open_mfdataset`` to better handle the invalid files - {'raise', 'warn', 'ignore'}, default 'raise'. If 'raise', then invalid dataset will raise an exception. - If 'ignore', then invalid dataset will be ignored. If 'warn', then a warning will be issued for each invalid dataset. +- Added exception handling for invalid files in ``xarray.open_mfdataset`` By `Pratiman Patel `_. diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 06b640ba619..5bbf0549b9f 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1390,8 +1390,8 @@ def open_groups( return groups - -def _remove_path(paths, path_to_remove) -> list: +FLike = TypeVar("FLike", bound=Union[str, ReadBuffer]) +def _remove_path(paths: NestedSequence[FLike], path_ro_remove: set[FLike]) -> NestedSequence[FLike] # Initialize an empty list to store the result result = [] @@ -1664,7 +1664,7 @@ def open_mfdataset( ) datasets = [] - remove_paths = False + invalid_paths = set() for p in paths1d: try: ds = open_(p, **open_kwargs) @@ -1676,8 +1676,14 @@ def open_mfdataset( emit_user_level_warning(f"Could not open {p} due to {e}. Ignoring.") # remove invalid paths if combine == "nested": - paths = _remove_path(paths, p) - remove_paths = True + invalid_paths.add(p) + + if invalid_paths and combine == "nested": + paths = _remove_path(paths, p) + + # Create new ids and paths based on removed items + combined_ids_paths = _infer_concat_order_from_positions(paths) + ids = list(combined_ids_paths.keys()) closers = [getattr_(ds, "_close") for ds in datasets] if preprocess is not None: @@ -1691,11 +1697,6 @@ def open_mfdataset( # Combine all datasets, closing them in case of a ValueError try: if combine == "nested": - # Create new ids and paths based on removed items - if remove_paths: - combined_ids_paths = _infer_concat_order_from_positions(paths) - ids = list(combined_ids_paths.keys()) - # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" combined = _nested_combine( From 4241372ec54a92eb649a2f2411e0191a43e9f242 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Wed, 9 Jul 2025 16:24:13 +0100 Subject: [PATCH 36/43] modified the logic to remove invalid files. --- xarray/backends/api.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 5bbf0549b9f..7d5d5dcebc3 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -41,7 +41,7 @@ from xarray.core.datatree import DataTree from xarray.core.indexes import Index from xarray.core.treenode import group_subtrees -from xarray.core.types import NetcdfWriteModes, ZarrWriteModes +from xarray.core.types import NetcdfWriteModes, ZarrWriteModes, ReadBuffer from xarray.core.utils import emit_user_level_warning, is_remote_uri from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager @@ -1390,18 +1390,18 @@ def open_groups( return groups -FLike = TypeVar("FLike", bound=Union[str, ReadBuffer]) -def _remove_path(paths: NestedSequence[FLike], path_ro_remove: set[FLike]) -> NestedSequence[FLike] +_FLike = TypeVar("FLike", bound=Union[str, os.PathLike, ReadBuffer]) +def _remove_path(paths: NestedSequence[_FLike], paths_to_remove: set[_FLike]) -> NestedSequence[_FLike]: # Initialize an empty list to store the result result = [] for item in paths: if isinstance(item, list): # If the current item is a list, recursively call remove_elements on it - nested_result = _remove_path(item, path_to_remove) + nested_result = _remove_path(item, paths_to_remove) if nested_result: # Only add non-empty lists to avoid adding empty lists result.append(nested_result) - elif item not in path_to_remove: + elif item not in paths_to_remove: # Add the item to the result if it is not in the set of elements to remove result.append(item) @@ -1675,15 +1675,14 @@ def open_mfdataset( elif errors == "warn": emit_user_level_warning(f"Could not open {p} due to {e}. Ignoring.") # remove invalid paths - if combine == "nested": - invalid_paths.add(p) + invalid_paths.add(p) - if invalid_paths and combine == "nested": - paths = _remove_path(paths, p) - - # Create new ids and paths based on removed items - combined_ids_paths = _infer_concat_order_from_positions(paths) - ids = list(combined_ids_paths.keys()) + if invalid_paths: + paths = _remove_path(paths, invalid_paths) + if combine == "nested": + # Create new ids and paths based on removed items + combined_ids_paths = _infer_concat_order_from_positions(paths) + ids = list(combined_ids_paths.keys()) closers = [getattr_(ds, "_close") for ds in datasets] if preprocess is not None: From 8ff7593e5d4bc66537e40d9149aaf227315500e8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Jul 2025 15:27:33 +0000 Subject: [PATCH 37/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- xarray/backends/api.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5c38b886c11..e55722dbcc8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,7 +14,7 @@ New Features ~~~~~~~~~~~~ - Allow skipping the creation of default indexes when opening datasets (:pull:`8051`). By `Benoit Bovy `_ and `Justus Magin `_. -- Added exception handling for invalid files in ``xarray.open_mfdataset`` +- Added exception handling for invalid files in ``xarray.open_mfdataset`` By `Pratiman Patel `_. Breaking changes diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f3bbd708264..a58e38c26af 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -42,7 +42,7 @@ from xarray.core.datatree import DataTree from xarray.core.indexes import Index from xarray.core.treenode import group_subtrees -from xarray.core.types import NetcdfWriteModes, ZarrWriteModes, ReadBuffer +from xarray.core.types import NetcdfWriteModes, ReadBuffer, ZarrWriteModes from xarray.core.utils import emit_user_level_warning, is_remote_uri from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager @@ -1445,8 +1445,13 @@ def open_groups( return groups -_FLike = TypeVar("FLike", bound=Union[str, os.PathLike, ReadBuffer]) -def _remove_path(paths: NestedSequence[_FLike], paths_to_remove: set[_FLike]) -> NestedSequence[_FLike]: + +_FLike = TypeVar("FLike", bound=Union[str, os.PathLike, ReadBuffer]) + + +def _remove_path( + paths: NestedSequence[_FLike], paths_to_remove: set[_FLike] +) -> NestedSequence[_FLike]: # Initialize an empty list to store the result result = [] @@ -1731,7 +1736,7 @@ def open_mfdataset( emit_user_level_warning(f"Could not open {p} due to {e}. Ignoring.") # remove invalid paths invalid_paths.add(p) - + if invalid_paths: paths = _remove_path(paths, invalid_paths) if combine == "nested": From 096a133d3dfd18dd8ce55e521a7d223174f07f15 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Wed, 9 Jul 2025 16:29:33 +0100 Subject: [PATCH 38/43] import ing TypeVar --- xarray/backends/api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index a58e38c26af..aa67e4e5028 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -21,6 +21,7 @@ Union, cast, overload, + TypeVar, ) import numpy as np From 87ebcf9be07297e10b55e63aa576fa6a355eff00 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Jul 2025 15:30:17 +0000 Subject: [PATCH 39/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index aa67e4e5028..888035bc49a 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -18,10 +18,10 @@ Any, Final, Literal, + TypeVar, Union, cast, overload, - TypeVar, ) import numpy as np From 229228ee337c436a8bfe592f1be350227c1931be Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Wed, 9 Jul 2025 16:38:10 +0100 Subject: [PATCH 40/43] making FLike private --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 888035bc49a..f78900d489b 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1447,7 +1447,7 @@ def open_groups( return groups -_FLike = TypeVar("FLike", bound=Union[str, os.PathLike, ReadBuffer]) +_FLike = TypeVar("_FLike", bound=Union[str, os.PathLike, ReadBuffer]) def _remove_path( From 7929dd34ab1493357f4165bb155ab94ceb3c6bd1 Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Wed, 9 Jul 2025 16:52:26 +0100 Subject: [PATCH 41/43] fixing mypy errors --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f78900d489b..d69c671e710 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1454,7 +1454,7 @@ def _remove_path( paths: NestedSequence[_FLike], paths_to_remove: set[_FLike] ) -> NestedSequence[_FLike]: # Initialize an empty list to store the result - result = [] + result: List[Union[_FLike, NestedSequence[_FLike]]] = [] for item in paths: if isinstance(item, list): From 1fbc34faf542b3b703f08cc94717f1cbd5130d4c Mon Sep 17 00:00:00 2001 From: Pratiman <31694629+pratiman-91@users.noreply.github.com> Date: Wed, 9 Jul 2025 17:00:15 +0100 Subject: [PATCH 42/43] importing List from typing --- xarray/backends/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index d69c671e710..77222897ae0 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -22,6 +22,7 @@ Union, cast, overload, + List ) import numpy as np @@ -1447,7 +1448,7 @@ def open_groups( return groups -_FLike = TypeVar("_FLike", bound=Union[str, os.PathLike, ReadBuffer]) +_FLike = TypeVar("_FLike", bound=Union[str, ReadBuffer]) def _remove_path( From cbdb290962372953294f95dba7b5be713e829a3c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Jul 2025 16:00:38 +0000 Subject: [PATCH 43/43] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/api.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 77222897ae0..3674ed718b3 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -22,7 +22,6 @@ Union, cast, overload, - List ) import numpy as np @@ -1455,7 +1454,7 @@ def _remove_path( paths: NestedSequence[_FLike], paths_to_remove: set[_FLike] ) -> NestedSequence[_FLike]: # Initialize an empty list to store the result - result: List[Union[_FLike, NestedSequence[_FLike]]] = [] + result: list[Union[_FLike, NestedSequence[_FLike]]] = [] for item in paths: if isinstance(item, list):