From 243c95c23ee667dd55b99b14952301f5c1e36481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Tue, 17 Jun 2025 16:02:15 +0200 Subject: [PATCH 01/15] Group decoding options into single argument --- doc/api-hidden.rst | 2 + doc/api.rst | 1 + doc/internals/how-to-add-new-backend.rst | 46 +- xarray/backends/__init__.py | 8 +- xarray/backends/api.py | 776 ++++++++++++----------- xarray/backends/common.py | 142 ++++- xarray/backends/h5netcdf_.py | 100 +-- xarray/backends/netCDF4_.py | 74 +-- xarray/backends/plugins.py | 2 +- xarray/backends/pydap_.py | 57 +- xarray/backends/scipy_.py | 27 +- xarray/backends/store.py | 25 +- xarray/backends/zarr.py | 221 +++---- xarray/conventions.py | 2 +- xarray/tests/test_backends.py | 17 +- xarray/tests/test_backends_api.py | 2 +- xarray/tests/test_backends_datatree.py | 15 +- xarray/tests/test_plugins.py | 6 +- 18 files changed, 769 insertions(+), 754 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 9a6037cf3c4..5241d060e9c 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -680,6 +680,8 @@ backends.BackendArray backends.BackendEntrypoint.guess_can_open backends.BackendEntrypoint.open_dataset + backends.CoderOptions + backends.CoderOptions.to_kwargs core.indexing.IndexingSupport core.indexing.explicit_indexing_adapter diff --git a/doc/api.rst b/doc/api.rst index b6023866eb8..1ac5cd1ed79 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1685,6 +1685,7 @@ Advanced API Dataset.set_close backends.BackendArray backends.BackendEntrypoint + backends.CoderOptions backends.list_engines backends.refresh_engines diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst index d3b5c3a9267..576ce00e7f9 100644 --- a/doc/internals/how-to-add-new-backend.rst +++ b/doc/internals/how-to-add-new-backend.rst @@ -38,21 +38,23 @@ This is what a ``BackendEntrypoint`` subclass should look like: .. code-block:: python - from xarray.backends import BackendEntrypoint + from xarray.backends import BackendEntrypoint, CoderOptions class MyBackendEntrypoint(BackendEntrypoint): + coder_class = CoderOptions + def open_dataset( self, filename_or_obj, *, - drop_variables=None, + coder_options=None, # other backend specific keyword arguments # `chunks` and `cache` DO NOT go here, they are handled by xarray ): - return my_open_dataset(filename_or_obj, drop_variables=drop_variables) + return my_open_dataset(filename_or_obj, coder_options=coder_options) - open_dataset_parameters = ["filename_or_obj", "drop_variables"] + open_dataset_parameters = ["filename_or_obj", "coder_options"] def guess_can_open(self, filename_or_obj): try: @@ -83,19 +85,15 @@ The following is an example of the high level processing steps: self, filename_or_obj, *, - drop_variables=None, - decode_times=True, - decode_timedelta=True, - decode_coords=True, + coder_options=None, my_backend_option=None, ): vars, attrs, coords = my_reader( filename_or_obj, - drop_variables=drop_variables, my_backend_option=my_backend_option, ) vars, attrs, coords = my_decode_variables( - vars, attrs, decode_times, decode_timedelta, decode_coords + vars, attrs, **coder_options.to_kwargs() ) # see also conventions.decode_cf_variables ds = xr.Dataset(vars, attrs=attrs, coords=coords) @@ -110,16 +108,13 @@ method shall be set by using :py:meth:`~xarray.Dataset.set_close`. The input of ``open_dataset`` method are one argument -(``filename_or_obj``) and one keyword argument (``drop_variables``): +(``filename_or_obj``) and one keyword argument (``coder_options``): - ``filename_or_obj``: can be any object but usually it is a string containing a path or an instance of :py:class:`pathlib.Path`. -- ``drop_variables``: can be ``None`` or an iterable containing the variable - names to be dropped when reading the data. +- ``coder_options``: can be None or :py:class:`~xarray.backends.CoderOptions` -If it makes sense for your backend, your ``open_dataset`` method -should implement in its interface the following boolean keyword arguments, called -**decoders**, which default to ``None``: +If it makes sense for your backend, you can override the ``CoderOptions`` fields, which default to ``None``: - ``mask_and_scale`` - ``decode_times`` @@ -127,12 +122,12 @@ should implement in its interface the following boolean keyword arguments, calle - ``use_cftime`` - ``concat_characters`` - ``decode_coords`` +- ``drop_variables`` -Note: all the supported decoders shall be declared explicitly -in backend ``open_dataset`` signature and adding a ``**kwargs`` is not allowed. +Note: If ``coder_options`` is None the given kwargs are validated against the default. These keyword arguments are explicitly defined in Xarray -:py:func:`~xarray.open_dataset` signature. Xarray will pass them to the +:py:func:`~xarray.CoderOptions` or subclass. Xarray will pass them to the backend only if the User explicitly sets a value different from ``None``. For more details on decoders see :ref:`RST decoders`. @@ -141,7 +136,6 @@ arguments. All these keyword arguments can be passed to :py:func:`~xarray.open_dataset` grouped either via the ``backend_kwargs`` parameter or explicitly using the syntax ``**kwargs``. - If you don't want to support the lazy loading, then the :py:class:`~xarray.Dataset` shall contain values as a :py:class:`numpy.ndarray` and your work is almost done. @@ -260,14 +254,16 @@ time is stored in two attributes dataDate and dataTime as strings. Therefore, it is not possible to reuse the Xarray time decoder, and implementing a new one is mandatory. -Decoders can be activated or deactivated using the boolean keywords of -Xarray :py:meth:`~xarray.open_dataset` signature: ``mask_and_scale``, +Decoders can be activated or deactivated using ``coder_options`` kwarg +(:py:class:`~xarray.backends.CoderOptions`) or it's boolean keywords equivalent of +Xarray :py:meth:`~xarray.open_dataset` (``mask_and_scale``, ``decode_times``, ``decode_timedelta``, ``use_cftime``, -``concat_characters``, ``decode_coords``. +``concat_characters``, ``decode_coords``. ``drop_variables``) Such keywords are passed to the backend only if the User sets a value different from ``None``. Note that the backend does not necessarily have to -implement all the decoders, but it shall declare in its ``open_dataset`` -interface only the boolean keywords related to the supported decoders. +implement all the decoders, but it shall declare a ``coder_class`` in its +``BackendEntrypoint`` interface with only the boolean keywords related to +the supported decoders. .. _RST backend_registration: diff --git a/xarray/backends/__init__.py b/xarray/backends/__init__.py index e5df179716f..a68d56a3f61 100644 --- a/xarray/backends/__init__.py +++ b/xarray/backends/__init__.py @@ -4,7 +4,12 @@ formats. They should not be used directly, but rather through Dataset objects. """ -from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint +from xarray.backends.common import ( + AbstractDataStore, + BackendArray, + BackendEntrypoint, + CoderOptions, +) from xarray.backends.file_manager import ( CachingFileManager, DummyFileManager, @@ -24,6 +29,7 @@ "BackendArray", "BackendEntrypoint", "CachingFileManager", + "CoderOptions", "DummyFileManager", "FileManager", "H5NetCDFStore", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 79deaed927d..c6d3dba7262 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -9,6 +9,7 @@ MutableMapping, Sequence, ) +from dataclasses import fields from functools import partial from io import BytesIO from numbers import Number @@ -29,11 +30,12 @@ from xarray.backends.common import ( AbstractDataStore, ArrayWriter, + CoderOptions, _find_absolute_paths, _normalize_path, + _reset_dataclass_to_false, ) from xarray.backends.locks import _get_scheduler -from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import indexing from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset @@ -388,6 +390,7 @@ def _dataset_from_backend_dataset( inline_array, chunked_array_type, from_array_kwargs, + coder_options, **extra_tokens, ): if not isinstance(chunks, int | dict) and chunks not in {None, "auto"}: @@ -395,6 +398,8 @@ def _dataset_from_backend_dataset( f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}." ) + extra_tokens.update(**coder_options.to_kwargs()) + _protect_dataset_variables_inplace(backend_ds, cache) if chunks is None: ds = backend_ds @@ -433,6 +438,7 @@ def _datatree_from_backend_datatree( inline_array, chunked_array_type, from_array_kwargs, + coder_options, **extra_tokens, ): if not isinstance(chunks, int | dict) and chunks not in {None, "auto"}: @@ -440,6 +446,8 @@ def _datatree_from_backend_datatree( f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}." ) + extra_tokens.update(**coder_options.to_kwargs()) + _protect_datatree_variables_inplace(backend_tree, cache) if chunks is None: tree = backend_tree @@ -476,30 +484,38 @@ def _datatree_from_backend_datatree( return tree +def _resolve_decoders_options(coder_options, backend, decoders): + # initialize CoderOptions with decoders if not given + # Deprecation Fallback + if coder_options is False: + coder_options = _reset_dataclass_to_false(backend.coder_options) + elif coder_options is True: + coder_options = backend.coder_options + elif coder_options is None: + decode_cf = decoders.pop("decode_cf", None) + if decode_cf is False: + coder_options = _reset_dataclass_to_false(backend.coder_options) + else: + field_names = {f.name for f in fields(backend.coder_class)} + coders = {} + for d in list(decoders): + if d in field_names: + coders[d] = decoders.pop(d) + coder_options = backend.coder_class(**coders) + return coder_options + + def open_dataset( filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, engine: T_Engine = None, chunks: T_Chunks = None, cache: bool | None = None, - decode_cf: bool | None = None, - mask_and_scale: bool | Mapping[str, bool] | None = None, - decode_times: bool - | CFDatetimeCoder - | Mapping[str, bool | CFDatetimeCoder] - | None = None, - decode_timedelta: bool - | CFTimedeltaCoder - | Mapping[str, bool | CFTimedeltaCoder] - | None = None, - use_cftime: bool | Mapping[str, bool] | None = None, - concat_characters: bool | Mapping[str, bool] | None = None, - decode_coords: Literal["coordinates", "all"] | bool | None = None, - drop_variables: str | Iterable[str] | None = None, inline_array: bool = False, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, + coder_options: Union[bool, CoderOptions, None] = None, **kwargs, ) -> Dataset: """Open and decode a dataset from a file or file-like object. @@ -539,76 +555,91 @@ def open_dataset( argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. - decode_cf : bool, optional - Whether to decode these variables, assuming they were saved according - to CF conventions. - mask_and_scale : bool or dict-like, optional - If True, replace array values equal to `_FillValue` with NA and scale - values according to the formula `original_values * scale_factor + - add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are - taken from variable attributes (if they exist). If the `_FillValue` or - `missing_value` attribute contains multiple values a warning will be - issued and all array values matching one of the multiple values will - be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - decode_times : bool, CFDatetimeCoder or dict-like, optional - If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them - encoded as numbers. - Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - decode_timedelta : bool, CFTimedeltaCoder, or dict-like, optional - If True, decode variables and coordinates with time units in - {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} - into timedelta objects. If False, leave them encoded as numbers. - If None (default), assume the same value of ``decode_times``; if - ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this - takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a - matching ``time_unit``. - Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - use_cftime: bool or dict-like, optional - Only relevant if encoded dates come from a standard calendar - (e.g. "gregorian", "proleptic_gregorian", "standard", or not - specified). If None (default), attempt to decode times to - ``np.datetime64[ns]`` objects; if this is not possible, decode times to - ``cftime.datetime`` objects. If True, always decode times to - ``cftime.datetime`` objects, regardless of whether or not they can be - represented using ``np.datetime64[ns]`` objects. If False, always - decode times to ``np.datetime64[ns]`` objects; if this is not possible - raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - - .. deprecated:: 2025.01.1 - Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. - - concat_characters : bool or dict-like, optional - If True, concatenate along the last dimension of character arrays to - form string arrays. Dimensions will only be concatenated over (and - removed) if they have no corresponding variable and if they are only - used as the last dimension of character arrays. - Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - decode_coords : bool or {"coordinates", "all"}, optional - Controls which variables are set as coordinate variables: - - - "coordinates" or True: Set variables referred to in the - ``'coordinates'`` attribute of the datasets or individual variables - as coordinate variables. - - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and - other attributes as coordinate variables. - - Only existing variables can be set as coordinates. Missing variables - will be silently ignored. - drop_variables: str or iterable of str, optional - A variable or list of variables to exclude from being parsed from the - dataset. This may be useful to drop variables with problems or - inconsistent values. + coder_options : bool or CoderOptions, optional + Dataclass containing below keyword arguments to pass to cf decoding. If set, + overrides any given keyword arguments: + + - 'decode_cf' : bool, optional + Whether to decode these variables, assuming they were saved according + to CF conventions. + + - 'mask_and_scale' : bool or dict-like, optional + If True, replace array values equal to `_FillValue` with NA and scale + values according to the formula `original_values * scale_factor + + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are + taken from variable attributes (if they exist). If the `_FillValue` or + `missing_value` attribute contains multiple values a warning will be + issued and all array values matching one of the multiple values will + be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_times' : bool, CFDatetimeCoder or dict-like, optional + If True, decode times encoded in the standard NetCDF datetime format + into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them + encoded as numbers. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_timedelta' : bool, CFTimedeltaCoder, or dict-like, optional + If True, decode variables and coordinates with time units in + {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of ``decode_times``; if + ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this + takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a + matching ``time_unit``. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'use_cftime' : bool or dict-like, optional + Only relevant if encoded dates come from a standard calendar + (e.g. "gregorian", "proleptic_gregorian", "standard", or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + + - 'concat_characters' : bool or dict-like, optional + If True, concatenate along the last dimension of character arrays to + form string arrays. Dimensions will only be concatenated over (and + removed) if they have no corresponding variable and if they are only + used as the last dimension of character arrays. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_coords' : bool or {"coordinates", "all"}, optional + Controls which variables are set as coordinate variables: + + - "coordinates" or True: Set variables referred to in the + ``'coordinates'`` attribute of the datasets or individual variables + as coordinate variables. + - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and + other attributes as coordinate variables. + + Only existing variables can be set as coordinates. Missing variables + will be silently ignored. + + - 'drop_variables' : str or iterable of str, optional + A variable or list of variables to exclude from being parsed from the + dataset. This may be useful to drop variables with problems or + inconsistent values. + + .. versionadded:: 2025.06.2 + The new keyword argument 'coder_options' was added. For backwards + compatibility coder_options can be given as keyword arguments, too. + inline_array: bool, default: False How to include the array in the dask task graph. By default(``inline_array=False``) the array is included in a task by @@ -617,13 +648,16 @@ def open_dataset( in the values of the task graph. See :py:func:`dask.array.from_array`. chunked_array_type: str, optional Which chunked array type to coerce this datasets' arrays to. - Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system. + Defaults to 'dask' if installed, else whatever is registered via the + `ChunkManagerEntryPoint` system. Experimental API that should not be relied upon. from_array_kwargs: dict - Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create - chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg. - For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed - to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon. + Additional keyword arguments passed on to the + `ChunkManagerEntrypoint.from_array` method used to create chunked arrays, + via whichever chunk manager is specified through the `chunked_array_type` kwarg. + For example if :py:func:`dask.array.Array` objects are used for chunking, + additional kwargs will be passed to :py:func:`dask.array.from_array`. + Experimental API that should not be relied upon. backend_kwargs: dict Additional keyword arguments passed on to the engine open function, equivalent to `**kwargs`. @@ -672,22 +706,13 @@ def open_dataset( backend = plugins.get_backend(engine) - decoders = _resolve_decoders_kwargs( - decode_cf, - open_backend_dataset_parameters=backend.open_dataset_parameters, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - decode_timedelta=decode_timedelta, - concat_characters=concat_characters, - use_cftime=use_cftime, - decode_coords=decode_coords, - ) + # initialize coder_options per kwargs if not given + coder_options = _resolve_decoders_options(coder_options, backend, kwargs) overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) backend_ds = backend.open_dataset( filename_or_obj, - drop_variables=drop_variables, - **decoders, + coder_options=coder_options, **kwargs, ) ds = _dataset_from_backend_dataset( @@ -700,8 +725,7 @@ def open_dataset( inline_array, chunked_array_type, from_array_kwargs, - drop_variables=drop_variables, - **decoders, + coder_options=coder_options, **kwargs, ) return ds @@ -713,21 +737,11 @@ def open_dataarray( engine: T_Engine = None, chunks: T_Chunks = None, cache: bool | None = None, - decode_cf: bool | None = None, - mask_and_scale: bool | None = None, - decode_times: bool - | CFDatetimeCoder - | Mapping[str, bool | CFDatetimeCoder] - | None = None, - decode_timedelta: bool | CFTimedeltaCoder | None = None, - use_cftime: bool | None = None, - concat_characters: bool | None = None, - decode_coords: Literal["coordinates", "all"] | bool | None = None, - drop_variables: str | Iterable[str] | None = None, inline_array: bool = False, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, + coder_options: Union[bool, CoderOptions, None] = None, **kwargs, ) -> DataArray: """Open an DataArray from a file or file-like object containing a single @@ -770,68 +784,91 @@ def open_dataarray( argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. - decode_cf : bool, optional - Whether to decode these variables, assuming they were saved according - to CF conventions. - mask_and_scale : bool, optional - If True, replace array values equal to `_FillValue` with NA and scale - values according to the formula `original_values * scale_factor + - add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are - taken from variable attributes (if they exist). If the `_FillValue` or - `missing_value` attribute contains multiple values a warning will be - issued and all array values matching one of the multiple values will - be replaced by NA. This keyword may not be supported by all the backends. - decode_times : bool, CFDatetimeCoder or dict-like, optional - If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or - leave them encoded as numbers. - Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - decode_timedelta : bool, optional - If True, decode variables and coordinates with time units in - {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} - into timedelta objects. If False, leave them encoded as numbers. - If None (default), assume the same value of ``decode_times``; if - ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this - takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a - matching ``time_unit``. - This keyword may not be supported by all the backends. - use_cftime: bool, optional - Only relevant if encoded dates come from a standard calendar - (e.g. "gregorian", "proleptic_gregorian", "standard", or not - specified). If None (default), attempt to decode times to - ``np.datetime64[ns]`` objects; if this is not possible, decode times to - ``cftime.datetime`` objects. If True, always decode times to - ``cftime.datetime`` objects, regardless of whether or not they can be - represented using ``np.datetime64[ns]`` objects. If False, always - decode times to ``np.datetime64[ns]`` objects; if this is not possible - raise an error. This keyword may not be supported by all the backends. - - .. deprecated:: 2025.01.1 - Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. - - concat_characters : bool, optional - If True, concatenate along the last dimension of character arrays to - form string arrays. Dimensions will only be concatenated over (and - removed) if they have no corresponding variable and if they are only - used as the last dimension of character arrays. - This keyword may not be supported by all the backends. - decode_coords : bool or {"coordinates", "all"}, optional - Controls which variables are set as coordinate variables: - - - "coordinates" or True: Set variables referred to in the - ``'coordinates'`` attribute of the datasets or individual variables - as coordinate variables. - - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and - other attributes as coordinate variables. - - Only existing variables can be set as coordinates. Missing variables - will be silently ignored. - drop_variables: str or iterable of str, optional - A variable or list of variables to exclude from being parsed from the - dataset. This may be useful to drop variables with problems or - inconsistent values. + coder_options : bool or CoderOptions, optional + Dataclass containing below keyword arguments to pass to cf decoding. If set, + overrides any given keyword arguments: + + - 'decode_cf' : bool, optional + Whether to decode these variables, assuming they were saved according + to CF conventions. + + - 'mask_and_scale' : bool or dict-like, optional + If True, replace array values equal to `_FillValue` with NA and scale + values according to the formula `original_values * scale_factor + + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are + taken from variable attributes (if they exist). If the `_FillValue` or + `missing_value` attribute contains multiple values a warning will be + issued and all array values matching one of the multiple values will + be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_times' : bool, CFDatetimeCoder or dict-like, optional + If True, decode times encoded in the standard NetCDF datetime format + into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them + encoded as numbers. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_timedelta' : bool, CFTimedeltaCoder, or dict-like, optional + If True, decode variables and coordinates with time units in + {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of ``decode_times``; if + ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this + takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a + matching ``time_unit``. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'use_cftime' : bool or dict-like, optional + Only relevant if encoded dates come from a standard calendar + (e.g. "gregorian", "proleptic_gregorian", "standard", or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + + - 'concat_characters' : bool or dict-like, optional + If True, concatenate along the last dimension of character arrays to + form string arrays. Dimensions will only be concatenated over (and + removed) if they have no corresponding variable and if they are only + used as the last dimension of character arrays. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_coords' : bool or {"coordinates", "all"}, optional + Controls which variables are set as coordinate variables: + + - "coordinates" or True: Set variables referred to in the + ``'coordinates'`` attribute of the datasets or individual variables + as coordinate variables. + - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and + other attributes as coordinate variables. + + Only existing variables can be set as coordinates. Missing variables + will be silently ignored. + + - 'drop_variables' : str or iterable of str, optional + A variable or list of variables to exclude from being parsed from the + dataset. This may be useful to drop variables with problems or + inconsistent values. + + .. versionadded:: 2025.06.2 + The new keyword argument 'coder_options' was added. For backwards + compatibility coder_options can be given as keyword arguments, too. + inline_array: bool, default: False How to include the array in the dask task graph. By default(``inline_array=False``) the array is included in a task by @@ -880,21 +917,14 @@ def open_dataarray( dataset = open_dataset( filename_or_obj, - decode_cf=decode_cf, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, engine=engine, chunks=chunks, cache=cache, - drop_variables=drop_variables, inline_array=inline_array, chunked_array_type=chunked_array_type, from_array_kwargs=from_array_kwargs, backend_kwargs=backend_kwargs, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + coder_options=coder_options, **kwargs, ) @@ -931,24 +961,11 @@ def open_datatree( engine: T_Engine = None, chunks: T_Chunks = None, cache: bool | None = None, - decode_cf: bool | None = None, - mask_and_scale: bool | Mapping[str, bool] | None = None, - decode_times: bool - | CFDatetimeCoder - | Mapping[str, bool | CFDatetimeCoder] - | None = None, - decode_timedelta: bool - | CFTimedeltaCoder - | Mapping[str, bool | CFTimedeltaCoder] - | None = None, - use_cftime: bool | Mapping[str, bool] | None = None, - concat_characters: bool | Mapping[str, bool] | None = None, - decode_coords: Literal["coordinates", "all"] | bool | None = None, - drop_variables: str | Iterable[str] | None = None, inline_array: bool = False, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, + coder_options: Union[bool, CoderOptions, None] = None, **kwargs, ) -> DataTree: """ @@ -984,76 +1001,92 @@ def open_datatree( argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. - decode_cf : bool, optional - Whether to decode these variables, assuming they were saved according - to CF conventions. - mask_and_scale : bool or dict-like, optional - If True, replace array values equal to `_FillValue` with NA and scale - values according to the formula `original_values * scale_factor + - add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are - taken from variable attributes (if they exist). If the `_FillValue` or - `missing_value` attribute contains multiple values a warning will be - issued and all array values matching one of the multiple values will - be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - decode_times : bool, CFDatetimeCoder or dict-like, optional - If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or - leave them encoded as numbers. - Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - decode_timedelta : bool or dict-like, optional - If True, decode variables and coordinates with time units in - {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} - into timedelta objects. If False, leave them encoded as numbers. - If None (default), assume the same value of ``decode_times``; if - ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this - takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a - matching ``time_unit``. - Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - use_cftime: bool or dict-like, optional - Only relevant if encoded dates come from a standard calendar - (e.g. "gregorian", "proleptic_gregorian", "standard", or not - specified). If None (default), attempt to decode times to - ``np.datetime64[ns]`` objects; if this is not possible, decode times to - ``cftime.datetime`` objects. If True, always decode times to - ``cftime.datetime`` objects, regardless of whether or not they can be - represented using ``np.datetime64[ns]`` objects. If False, always - decode times to ``np.datetime64[ns]`` objects; if this is not possible - raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - - .. deprecated:: 2025.01.1 - Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. - - concat_characters : bool or dict-like, optional - If True, concatenate along the last dimension of character arrays to - form string arrays. Dimensions will only be concatenated over (and - removed) if they have no corresponding variable and if they are only - used as the last dimension of character arrays. - Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - decode_coords : bool or {"coordinates", "all"}, optional - Controls which variables are set as coordinate variables: - - - "coordinates" or True: Set variables referred to in the - ``'coordinates'`` attribute of the datasets or individual variables - as coordinate variables. - - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and - other attributes as coordinate variables. - - Only existing variables can be set as coordinates. Missing variables - will be silently ignored. - drop_variables: str or iterable of str, optional - A variable or list of variables to exclude from being parsed from the - dataset. This may be useful to drop variables with problems or - inconsistent values. + coder_options : bool or CoderOptions, optional + Dataclass containing below keyword arguments to pass to cf decoding. If set, + overrides any given keyword arguments: + + - 'decode_cf' : bool, optional + Whether to decode these variables, assuming they were saved according + to CF conventions. + + - 'mask_and_scale' : bool or dict-like, optional + If True, replace array values equal to `_FillValue` with NA and scale + values according to the formula `original_values * scale_factor + + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are + taken from variable attributes (if they exist). If the `_FillValue` or + `missing_value` attribute contains multiple values a warning will be + issued and all array values matching one of the multiple values will + be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_times' : bool, CFDatetimeCoder or dict-like, optional + If True, decode times encoded in the standard NetCDF datetime format + into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them + encoded as numbers. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_timedelta' : bool, CFTimedeltaCoder, or dict-like, optional + If True, decode variables and coordinates with time units in + {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of ``decode_times``; if + ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this + takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a + matching ``time_unit``. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'use_cftime' : bool or dict-like, optional + Only relevant if encoded dates come from a standard calendar + (e.g. "gregorian", "proleptic_gregorian", "standard", or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + + - 'concat_characters' : bool or dict-like, optional + If True, concatenate along the last dimension of character arrays to + form string arrays. Dimensions will only be concatenated over (and + removed) if they have no corresponding variable and if they are only + used as the last dimension of character arrays. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_coords' : bool or {"coordinates", "all"}, optional + Controls which variables are set as coordinate variables: + + - "coordinates" or True: Set variables referred to in the + ``'coordinates'`` attribute of the datasets or individual variables + as coordinate variables. + - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and + other attributes as coordinate variables. + + Only existing variables can be set as coordinates. Missing variables + will be silently ignored. + + - 'drop_variables' : str or iterable of str, optional + A variable or list of variables to exclude from being parsed from the + dataset. This may be useful to drop variables with problems or + inconsistent values. + + .. versionadded:: 2025.06.2 + The new keyword argument 'coder_options' was added. For backwards + compatibility coder_options can be given as keyword arguments, too. + + inline_array: bool, default: False How to include the array in the dask task graph. By default(``inline_array=False``) the array is included in a task by @@ -1117,22 +1150,12 @@ def open_datatree( backend = plugins.get_backend(engine) - decoders = _resolve_decoders_kwargs( - decode_cf, - open_backend_dataset_parameters=backend.open_dataset_parameters, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - decode_timedelta=decode_timedelta, - concat_characters=concat_characters, - use_cftime=use_cftime, - decode_coords=decode_coords, - ) + coder_options = _resolve_decoders_options(coder_options, backend, kwargs) overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) backend_tree = backend.open_datatree( filename_or_obj, - drop_variables=drop_variables, - **decoders, + coder_options=coder_options, **kwargs, ) @@ -1146,8 +1169,7 @@ def open_datatree( inline_array, chunked_array_type, from_array_kwargs, - drop_variables=drop_variables, - **decoders, + coder_options, **kwargs, ) @@ -1160,24 +1182,11 @@ def open_groups( engine: T_Engine = None, chunks: T_Chunks = None, cache: bool | None = None, - decode_cf: bool | None = None, - mask_and_scale: bool | Mapping[str, bool] | None = None, - decode_times: bool - | CFDatetimeCoder - | Mapping[str, bool | CFDatetimeCoder] - | None = None, - decode_timedelta: bool - | CFTimedeltaCoder - | Mapping[str, bool | CFTimedeltaCoder] - | None = None, - use_cftime: bool | Mapping[str, bool] | None = None, - concat_characters: bool | Mapping[str, bool] | None = None, - decode_coords: Literal["coordinates", "all"] | bool | None = None, - drop_variables: str | Iterable[str] | None = None, inline_array: bool = False, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, + coder_options: Union[bool, CoderOptions, None] = None, **kwargs, ) -> dict[str, Dataset]: """ @@ -1217,74 +1226,91 @@ def open_groups( argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. - decode_cf : bool, optional - Whether to decode these variables, assuming they were saved according - to CF conventions. - mask_and_scale : bool or dict-like, optional - If True, replace array values equal to `_FillValue` with NA and scale - values according to the formula `original_values * scale_factor + - add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are - taken from variable attributes (if they exist). If the `_FillValue` or - `missing_value` attribute contains multiple values a warning will be - issued and all array values matching one of the multiple values will - be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - decode_times : bool, CFDatetimeCoder or dict-like, optional - If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or - leave them encoded as numbers. - Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - decode_timedelta : bool or dict-like, optional - If True, decode variables and coordinates with time units in - {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} - into timedelta objects. If False, leave them encoded as numbers. - If None (default), assume the same value of ``decode_times``; if - ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this - takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a - matching ``time_unit``. - This keyword may not be supported by all the backends. - use_cftime: bool or dict-like, optional - Only relevant if encoded dates come from a standard calendar - (e.g. "gregorian", "proleptic_gregorian", "standard", or not - specified). If None (default), attempt to decode times to - ``np.datetime64[ns]`` objects; if this is not possible, decode times to - ``cftime.datetime`` objects. If True, always decode times to - ``cftime.datetime`` objects, regardless of whether or not they can be - represented using ``np.datetime64[ns]`` objects. If False, always - decode times to ``np.datetime64[ns]`` objects; if this is not possible - raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - - .. deprecated:: 2025.01.1 - Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. - - concat_characters : bool or dict-like, optional - If True, concatenate along the last dimension of character arrays to - form string arrays. Dimensions will only be concatenated over (and - removed) if they have no corresponding variable and if they are only - used as the last dimension of character arrays. - Pass a mapping, e.g. ``{"my_variable": False}``, - to toggle this feature per-variable individually. - This keyword may not be supported by all the backends. - decode_coords : bool or {"coordinates", "all"}, optional - Controls which variables are set as coordinate variables: - - - "coordinates" or True: Set variables referred to in the - ``'coordinates'`` attribute of the datasets or individual variables - as coordinate variables. - - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and - other attributes as coordinate variables. - - Only existing variables can be set as coordinates. Missing variables - will be silently ignored. - drop_variables: str or iterable of str, optional - A variable or list of variables to exclude from being parsed from the - dataset. This may be useful to drop variables with problems or - inconsistent values. + coder_options : bool or CoderOptions, optional + Dataclass containing below keyword arguments to pass to cf decoding. If set, + overrides any given keyword arguments: + + - 'decode_cf' : bool, optional + Whether to decode these variables, assuming they were saved according + to CF conventions. + + - 'mask_and_scale' : bool or dict-like, optional + If True, replace array values equal to `_FillValue` with NA and scale + values according to the formula `original_values * scale_factor + + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are + taken from variable attributes (if they exist). If the `_FillValue` or + `missing_value` attribute contains multiple values a warning will be + issued and all array values matching one of the multiple values will + be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_times' : bool, CFDatetimeCoder or dict-like, optional + If True, decode times encoded in the standard NetCDF datetime format + into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them + encoded as numbers. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_timedelta' : bool, CFTimedeltaCoder, or dict-like, optional + If True, decode variables and coordinates with time units in + {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of ``decode_times``; if + ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this + takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a + matching ``time_unit``. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'use_cftime' : bool or dict-like, optional + Only relevant if encoded dates come from a standard calendar + (e.g. "gregorian", "proleptic_gregorian", "standard", or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + + - 'concat_characters' : bool or dict-like, optional + If True, concatenate along the last dimension of character arrays to + form string arrays. Dimensions will only be concatenated over (and + removed) if they have no corresponding variable and if they are only + used as the last dimension of character arrays. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_coords' : bool or {"coordinates", "all"}, optional + Controls which variables are set as coordinate variables: + + - "coordinates" or True: Set variables referred to in the + ``'coordinates'`` attribute of the datasets or individual variables + as coordinate variables. + - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and + other attributes as coordinate variables. + + Only existing variables can be set as coordinates. Missing variables + will be silently ignored. + + - 'drop_variables' : str or iterable of str, optional + A variable or list of variables to exclude from being parsed from the + dataset. This may be useful to drop variables with problems or + inconsistent values. + + .. versionadded:: 2025.06.2 + The new keyword argument 'coder_options' was added. For backwards + compatibility coder_options can be given as keyword arguments, too. + inline_array: bool, default: False How to include the array in the dask task graph. By default(``inline_array=False``) the array is included in a task by @@ -1349,23 +1375,12 @@ def open_groups( backend = plugins.get_backend(engine) - decoders = _resolve_decoders_kwargs( - decode_cf, - open_backend_dataset_parameters=(), - mask_and_scale=mask_and_scale, - decode_times=decode_times, - decode_timedelta=decode_timedelta, - concat_characters=concat_characters, - use_cftime=use_cftime, - decode_coords=decode_coords, - ) - overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) + coder_options = _resolve_decoders_options(coder_options, backend, kwargs) + overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) backend_groups = backend.open_groups_as_dict( filename_or_obj, - drop_variables=drop_variables, - **decoders, - **kwargs, + coder_options=coder_options, ) groups = { @@ -1379,8 +1394,7 @@ def open_groups( inline_array, chunked_array_type, from_array_kwargs, - drop_variables=drop_variables, - **decoders, + coder_options, **kwargs, ) for name, backend_ds in backend_groups.items() diff --git a/xarray/backends/common.py b/xarray/backends/common.py index e574f19e9d4..b7d50def671 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -5,13 +5,24 @@ import time import traceback from collections.abc import Hashable, Iterable, Mapping, Sequence +from dataclasses import dataclass, fields, replace from glob import glob -from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, Union, overload +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Literal, + Optional, + TypeVar, + Union, + overload, +) import numpy as np import pandas as pd from xarray.coding import strings, variables +from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.coding.variables import SerializationWarning from xarray.conventions import cf_encoder from xarray.core import indexing @@ -646,6 +657,119 @@ def encode(self, variables, attributes): return variables, attributes +def _reset_dataclass_to_false(instance): + # Returns instance with all elements set to False + field_names = [f.name for f in fields(instance)] + false_values = dict.fromkeys(field_names, False) + return replace(instance, **false_values) + + +def _validate_kwargs_for_dataclass(cls, kwargs): + valid_fields = {f.name for f in fields(cls)} + invalid = {k: v for k, v in kwargs.items() if k not in valid_fields} + return invalid + + +@dataclass(frozen=True, kw_only=True) +class CoderOptions: + """ + CF Coding Options. + + Parameters + ---------- + mask_and_scale : bool or dict-like, optional + If True, replace array values equal to `_FillValue` with NA and scale + values according to the formula `original_values * scale_factor + + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are + taken from variable attributes (if they exist). If the `_FillValue` or + `missing_value` attribute contains multiple values a warning will be + issued and all array values matching one of the multiple values will + be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + decode_times : bool, CFDatetimeCoder or dict-like, optional + If True, decode times encoded in the standard NetCDF datetime format + into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them + encoded as numbers. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + decode_timedelta : bool, CFTimedeltaCoder, or dict-like, optional + If True, decode variables and coordinates with time units in + {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of ``decode_times``; if + ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this + takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a + matching ``time_unit``. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + use_cftime : bool or dict-like, optional + Only relevant if encoded dates come from a standard calendar + (e.g. "gregorian", "proleptic_gregorian", "standard", or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + + concat_characters : bool or dict-like, optional + If True, concatenate along the last dimension of character arrays to + form string arrays. Dimensions will only be concatenated over (and + removed) if they have no corresponding variable and if they are only + used as the last dimension of character arrays. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + decode_coords : bool or {"coordinates", "all"}, optional + Controls which variables are set as coordinate variables: + + - "coordinates" or True: Set variables referred to in the + ``'coordinates'`` attribute of the datasets or individual variables + as coordinate variables. + - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and + other attributes as coordinate variables. + + Only existing variables can be set as coordinates. Missing variables + will be silently ignored. + + drop_variables : str or iterable of str, optional + A variable or list of variables to exclude from being parsed from the + dataset. This may be useful to drop variables with problems or + inconsistent values. + """ + + # Todo: maybe add these two to disentangle masking from scaling? + # mask: Optional[bool] = None + # scale: Optional[bool] = None + mask_and_scale: Optional[bool | Mapping[str, bool]] = None + decode_times: Optional[ + bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] + ] = None + decode_timedelta: Optional[ + bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] + ] = None + use_cftime: Optional[bool | Mapping[str, bool]] = None + concat_characters: Optional[bool | Mapping[str, bool]] = None + decode_coords: Optional[Literal["coordinates", "all"] | bool] = None + drop_variables: Optional[str | Iterable[str]] = None + + def to_kwargs(self): + return {k: v for k, v in vars(self).items() if v is not None} + + class BackendEntrypoint: """ ``BackendEntrypoint`` is a class container and it is the main interface @@ -683,6 +807,16 @@ class BackendEntrypoint: open_dataset_parameters: ClassVar[tuple | None] = None description: ClassVar[str] = "" url: ClassVar[str] = "" + coder_class = CoderOptions + + def __init__( + self, + coder_options: Optional[CoderOptions] = None, + ): + # Instantiate default coder_options + self.coder_options = ( + coder_options if coder_options is not None else self.coder_class() + ) def __repr__(self) -> str: txt = f"<{type(self).__name__}>" @@ -696,7 +830,7 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - drop_variables: str | Iterable[str] | None = None, + coder_options: Union[bool, CoderOptions, None] = None, ) -> Dataset: """ Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. @@ -718,7 +852,7 @@ def open_datatree( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - drop_variables: str | Iterable[str] | None = None, + coder_options: Union[bool, CoderOptions, None] = None, ) -> DataTree: """ Backend open_datatree method used by Xarray in :py:func:`~xarray.open_datatree`. @@ -730,7 +864,7 @@ def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - drop_variables: str | Iterable[str] | None = None, + coder_options: Union[bool, CoderOptions, None] = None, ) -> dict[str, Dataset]: """ Opens a dictionary mapping from group names to Datasets. diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index ba3a6d20e37..19197d4aad5 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -3,14 +3,14 @@ import functools import io import os -from collections.abc import Iterable -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Optional import numpy as np from xarray.backends.common import ( BACKEND_ENTRYPOINTS, BackendEntrypoint, + CoderOptions, WritableCFDataStore, _normalize_path, _open_remote_file, @@ -28,6 +28,7 @@ _get_datatype, _nc4_require_group, ) +from xarray.backends.netCDF4_ import NetCDF4CoderOptions as H5netcdfCoderOptions from xarray.backends.store import StoreBackendEntrypoint from xarray.core import indexing from xarray.core.utils import ( @@ -371,23 +372,6 @@ def close(self, **kwargs): self._manager.close(**kwargs) -def _check_phony_dims(phony_dims): - emit_phony_dims_warning = False - if phony_dims is None: - emit_phony_dims_warning = True - phony_dims = "access" - return emit_phony_dims_warning, phony_dims - - -def _emit_phony_dims_warning(): - emit_user_level_warning( - "The 'phony_dims' kwarg now defaults to 'access'. " - "Previously 'phony_dims=None' would raise an error. " - "For full netcdf equivalence please use phony_dims='sort'.", - UserWarning, - ) - - class H5netcdfBackendEntrypoint(BackendEntrypoint): """ Backend for netCDF files based on the h5netcdf package. @@ -410,6 +394,8 @@ class H5netcdfBackendEntrypoint(BackendEntrypoint): backends.ScipyBackendEntrypoint """ + coder_class = H5netcdfCoderOptions + description = ( "Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using h5netcdf in Xarray" ) @@ -433,27 +419,21 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, format=None, group=None, lock=None, invalid_netcdf=None, - phony_dims=None, + phony_dims="access", decode_vlen_strings=True, driver=None, driver_kwds=None, storage_options: dict[str, Any] | None = None, + coder_options: Optional[CoderOptions] = None, + **kwargs, ) -> Dataset: - # Keep this message for some versions - # remove and set phony_dims="access" above - emit_phony_dims_warning, phony_dims = _check_phony_dims(phony_dims) - + coder_options = ( + coder_options if coder_options is not None else self.coder_options + ) filename_or_obj = _normalize_path(filename_or_obj) store = H5NetCDFStore.open( filename_or_obj, @@ -472,53 +452,28 @@ def open_dataset( ds = store_entrypoint.open_dataset( store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + coder_options=coder_options, ) - # only warn if phony_dims exist in file - # remove together with the above check - # after some versions - if store.ds._root._phony_dim_count > 0 and emit_phony_dims_warning: - _emit_phony_dims_warning() - return ds def open_datatree( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, format=None, group: str | None = None, lock=None, invalid_netcdf=None, - phony_dims=None, + phony_dims="access", decode_vlen_strings=True, driver=None, driver_kwds=None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( filename_or_obj, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, format=format, group=group, lock=lock, @@ -527,6 +482,7 @@ def open_datatree( decode_vlen_strings=decode_vlen_strings, driver=driver, driver_kwds=driver_kwds, + coder_options=coder_options, **kwargs, ) @@ -536,13 +492,6 @@ def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, format=None, group: str | None = None, lock=None, @@ -551,16 +500,13 @@ def open_groups_as_dict( decode_vlen_strings=True, driver=None, driver_kwds=None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> dict[str, Dataset]: from xarray.backends.common import _iter_nc_groups from xarray.core.treenode import NodePath from xarray.core.utils import close_on_error - # Keep this message for some versions - # remove and set phony_dims="access" above - emit_phony_dims_warning, phony_dims = _check_phony_dims(phony_dims) - filename_or_obj = _normalize_path(filename_or_obj) store = H5NetCDFStore.open( filename_or_obj, @@ -588,13 +534,7 @@ def open_groups_as_dict( with close_on_error(group_store): group_ds = store_entrypoint.open_dataset( group_store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + coder_options=coder_options, ) if group: @@ -603,12 +543,6 @@ def open_groups_as_dict( group_name = str(NodePath(path_group)) groups_dict[group_name] = group_ds - # only warn if phony_dims exist in file - # remove together with the above check - # after some versions - if store.ds._phony_dim_count > 0 and emit_phony_dims_warning: - _emit_phony_dims_warning() - return groups_dict diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index a23d247b6c3..c9070d20e43 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -3,9 +3,10 @@ import functools import operator import os -from collections.abc import Iterable +from collections.abc import Mapping from contextlib import suppress -from typing import TYPE_CHECKING, Any +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Literal, Optional import numpy as np @@ -14,6 +15,7 @@ BACKEND_ENTRYPOINTS, BackendArray, BackendEntrypoint, + CoderOptions, WritableCFDataStore, _normalize_path, datatree_from_dict_with_io_cleanup, @@ -30,6 +32,7 @@ ) from xarray.backends.netcdf3 import encode_nc3_attr_value, encode_nc3_variable from xarray.backends.store import StoreBackendEntrypoint +from xarray.coding.times import CFDatetimeCoder from xarray.coding.variables import pop_to from xarray.core import indexing from xarray.core.utils import ( @@ -597,6 +600,17 @@ def close(self, **kwargs): self._manager.close(**kwargs) +@dataclass(frozen=True) +class NetCDF4CoderOptions(CoderOptions): + # defaults for netcdf4 based backends + mask_and_scale: Optional[bool | Mapping[str, bool]] = True + decode_times: Optional[ + bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] + ] = True + concat_characters: Optional[bool | Mapping[str, bool]] = True + decode_coords: Optional[Literal["coordinates", "all"] | bool] = True + + class NetCDF4BackendEntrypoint(BackendEntrypoint): """ Backend for netCDF files based on the netCDF4 package. @@ -619,6 +633,8 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint): backends.ScipyBackendEntrypoint """ + coder_class = NetCDF4CoderOptions + description = ( "Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray" ) @@ -645,15 +661,7 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, group=None, - mode="r", format="NETCDF4", clobber=True, diskless=False, @@ -661,11 +669,14 @@ def open_dataset( auto_complex=None, lock=None, autoclose=False, + coder_options: Optional[CoderOptions] = None, ) -> Dataset: + coder_options = ( + coder_options if coder_options is not None else self.coder_options + ) filename_or_obj = _normalize_path(filename_or_obj) store = NetCDF4DataStore.open( filename_or_obj, - mode=mode, format=format, group=group, clobber=clobber, @@ -680,13 +691,7 @@ def open_dataset( with close_on_error(store): ds = store_entrypoint.open_dataset( store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + coder_options=coder_options, ) return ds @@ -694,13 +699,6 @@ def open_datatree( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, group: str | None = None, format="NETCDF4", clobber=True, @@ -709,17 +707,11 @@ def open_datatree( auto_complex=None, lock=None, autoclose=False, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( filename_or_obj, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, group=group, format=format, clobber=clobber, @@ -727,6 +719,7 @@ def open_datatree( persist=persist, lock=lock, autoclose=autoclose, + coder_options=coder_options, **kwargs, ) @@ -736,13 +729,6 @@ def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, group: str | None = None, format="NETCDF4", clobber=True, @@ -751,6 +737,7 @@ def open_groups_as_dict( auto_complex=None, lock=None, autoclose=False, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> dict[str, Dataset]: from xarray.backends.common import _iter_nc_groups @@ -767,7 +754,6 @@ def open_groups_as_dict( lock=lock, autoclose=autoclose, ) - # Check for a group and make it a parent if it exists if group: parent = NodePath("/") / NodePath(group) @@ -782,13 +768,7 @@ def open_groups_as_dict( with close_on_error(group_store): group_ds = store_entrypoint.open_dataset( group_store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + coder_options=coder_options, ) if group: group_name = str(NodePath(path_group).relative_to(parent)) diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 555538c2562..841f5a7181c 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -51,7 +51,7 @@ def detect_parameters(open_dataset: Callable) -> tuple[str, ...]: parameters_list = [] for name, param in parameters.items(): if param.kind in ( - inspect.Parameter.VAR_KEYWORD, + # inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL, ): raise TypeError( diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 301ea430c4c..f54a07d9a9d 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -1,7 +1,6 @@ from __future__ import annotations -from collections.abc import Iterable -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Optional import numpy as np @@ -10,6 +9,7 @@ AbstractDataStore, BackendArray, BackendEntrypoint, + CoderOptions, _normalize_path, datatree_from_dict_with_io_cleanup, robust_getitem, @@ -216,13 +216,6 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, group=None, application=None, session=None, @@ -230,6 +223,8 @@ def open_dataset( timeout=None, verify=None, user_charset=None, + coder_options: Optional[CoderOptions] = None, + **kwargs, ) -> Dataset: store = PydapDataStore.open( url=filename_or_obj, @@ -245,13 +240,7 @@ def open_dataset( with close_on_error(store): ds = store_entrypoint.open_dataset( store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + coder_options=coder_options, ) return ds @@ -259,35 +248,24 @@ def open_datatree( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, group: str | None = None, application=None, session=None, timeout=None, verify=None, user_charset=None, + coder_options: Optional[CoderOptions] = None, + **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( - filename_or_obj, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, group=group, application=None, session=None, timeout=None, verify=None, user_charset=None, + coder_options=coder_options, + **kwargs, ) return datatree_from_dict_with_io_cleanup(groups_dict) @@ -296,19 +274,14 @@ def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, group: str | None = None, application=None, session=None, timeout=None, verify=None, user_charset=None, + coder_options: Optional[CoderOptions] = None, + **kwargs, ) -> dict[str, Dataset]: from xarray.core.treenode import NodePath @@ -374,13 +347,7 @@ def group_fqn(store, path=None, g_fqn=None) -> dict[str, str]: with close_on_error(store): group_ds = store_entrypoint.open_dataset( store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + coder_options=coder_options, ) if group: group_name = str(NodePath(path_group).relative_to(parent)) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 93d0e40a6e1..7d81abd7c72 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -3,8 +3,7 @@ import gzip import io import os -from collections.abc import Iterable -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Optional import numpy as np @@ -12,6 +11,7 @@ BACKEND_ENTRYPOINTS, BackendArray, BackendEntrypoint, + CoderOptions, WritableCFDataStore, _normalize_path, ) @@ -310,35 +310,26 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, - mode="r", format=None, group=None, mmap=None, lock=None, + coder_options: Optional[CoderOptions] = None, + **kwargs, ) -> Dataset: + coder_options = ( + coder_options if coder_options is not None else self.coder_options + ) filename_or_obj = _normalize_path(filename_or_obj) store = ScipyDataStore( - filename_or_obj, mode=mode, format=format, group=group, mmap=mmap, lock=lock + filename_or_obj, format=format, group=group, mmap=mmap, lock=lock ) store_entrypoint = StoreBackendEntrypoint() with close_on_error(store): ds = store_entrypoint.open_dataset( store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + coder_options=coder_options, ) return ds diff --git a/xarray/backends/store.py b/xarray/backends/store.py index b1b3956ca8e..e83239a1e56 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -1,13 +1,13 @@ from __future__ import annotations -from collections.abc import Iterable -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Optional from xarray import conventions from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractDataStore, BackendEntrypoint, + CoderOptions, ) from xarray.core.dataset import Dataset @@ -31,29 +31,18 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, + coder_options: Optional[CoderOptions] = None, ) -> Dataset: assert isinstance(filename_or_obj, AbstractDataStore) vars, attrs = filename_or_obj.load() encoding = filename_or_obj.get_encoding() + coder_options = ( + coder_options if coder_options is not None else self.coder_options + ) vars, attrs, coord_names = conventions.decode_cf_variables( - vars, - attrs, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + vars, attrs, **coder_options.to_kwargs() ) ds = Dataset(vars, attrs=attrs) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index b86b5d0b374..b89b7248051 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -5,7 +5,7 @@ import os import struct from collections.abc import Hashable, Iterable, Mapping -from typing import TYPE_CHECKING, Any, Literal, cast +from typing import TYPE_CHECKING, Any, Literal, Optional, cast import numpy as np import pandas as pd @@ -17,8 +17,10 @@ AbstractWritableDataStore, BackendArray, BackendEntrypoint, + CoderOptions, _encode_variable_name, _normalize_path, + _validate_kwargs_for_dataclass, datatree_from_dict_with_io_cleanup, ensure_dtype_not_object, ) @@ -1330,23 +1332,24 @@ def open_zarr( group=None, synchronizer=None, chunks="auto", - decode_cf=True, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables=None, + # decode_cf=True, + # mask_and_scale=True, + # decode_times=True, + # concat_characters=True, + # decode_coords=True, + # drop_variables=None, consolidated=None, overwrite_encoded_chunks=False, chunk_store=None, storage_options=None, - decode_timedelta=None, - use_cftime=None, + # decode_timedelta=None, + # use_cftime=None, zarr_version=None, zarr_format=None, use_zarr_fill_value_as_mask=None, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ): """Load and decode a dataset from a Zarr store. @@ -1379,32 +1382,91 @@ def open_zarr( overwrite_encoded_chunks : bool, optional Whether to drop the zarr chunks encoded for each variable when a dataset is loaded with specified chunk sizes (default: False) - decode_cf : bool, optional - Whether to decode these variables, assuming they were saved according - to CF conventions. - mask_and_scale : bool, optional - If True, replace array values equal to `_FillValue` with NA and scale - values according to the formula `original_values * scale_factor + - add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are - taken from variable attributes (if they exist). If the `_FillValue` or - `missing_value` attribute contains multiple values a warning will be - issued and all array values matching one of the multiple values will - be replaced by NA. - decode_times : bool, optional - If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, leave them encoded as numbers. - concat_characters : bool, optional - If True, concatenate along the last dimension of character arrays to - form string arrays. Dimensions will only be concatenated over (and - removed) if they have no corresponding variable and if they are only - used as the last dimension of character arrays. - decode_coords : bool, optional - If True, decode the 'coordinates' attribute to identify coordinates in - the resulting dataset. - drop_variables : str or iterable, optional - A variable or list of variables to exclude from being parsed from the - dataset. This may be useful to drop variables with problems or - inconsistent values. + coder_options : bool or CoderOptions, optional + Dataclass containing below keyword arguments to pass to cf decoding. If set, + overrides any given keyword arguments: + + - 'decode_cf' : bool, optional + Whether to decode these variables, assuming they were saved according + to CF conventions. + + - 'mask_and_scale' : bool or dict-like, optional + If True, replace array values equal to `_FillValue` with NA and scale + values according to the formula `original_values * scale_factor + + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are + taken from variable attributes (if they exist). If the `_FillValue` or + `missing_value` attribute contains multiple values a warning will be + issued and all array values matching one of the multiple values will + be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_times' : bool, CFDatetimeCoder or dict-like, optional + If True, decode times encoded in the standard NetCDF datetime format + into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them + encoded as numbers. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_timedelta' : bool, CFTimedeltaCoder, or dict-like, optional + If True, decode variables and coordinates with time units in + {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of ``decode_times``; if + ``decode_times`` is a :py:class:`coders.CFDatetimeCoder` instance, this + takes the form of a :py:class:`coders.CFTimedeltaCoder` instance with a + matching ``time_unit``. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'use_cftime' : bool or dict-like, optional + Only relevant if encoded dates come from a standard calendar + (e.g. "gregorian", "proleptic_gregorian", "standard", or not + specified). If None (default), attempt to decode times to + ``np.datetime64[ns]`` objects; if this is not possible, decode times to + ``cftime.datetime`` objects. If True, always decode times to + ``cftime.datetime`` objects, regardless of whether or not they can be + represented using ``np.datetime64[ns]`` objects. If False, always + decode times to ``np.datetime64[ns]`` objects; if this is not possible + raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + + - 'concat_characters' : bool or dict-like, optional + If True, concatenate along the last dimension of character arrays to + form string arrays. Dimensions will only be concatenated over (and + removed) if they have no corresponding variable and if they are only + used as the last dimension of character arrays. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. + This keyword may not be supported by all the backends. + + - 'decode_coords' : bool or {"coordinates", "all"}, optional + Controls which variables are set as coordinate variables: + + - "coordinates" or True: Set variables referred to in the + ``'coordinates'`` attribute of the datasets or individual variables + as coordinate variables. + - "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and + other attributes as coordinate variables. + + Only existing variables can be set as coordinates. Missing variables + will be silently ignored. + + - 'drop_variables' : str or iterable of str, optional + A variable or list of variables to exclude from being parsed from the + dataset. This may be useful to drop variables with problems or + inconsistent values. + + .. versionadded:: 2025.06.2 + The new keyword argument 'coder_options' was added. For backwards + compatibility coder_options can be given as keyword arguments, too. + consolidated : bool, optional Whether to open the store using zarr's consolidated metadata capability. Only works for stores that have already been consolidated. @@ -1418,21 +1480,6 @@ def open_zarr( storage_options : dict, optional Any additional parameters for the storage backend (ignored for local paths). - decode_timedelta : bool, optional - If True, decode variables and coordinates with time units in - {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} - into timedelta objects. If False, leave them encoded as numbers. - If None (default), assume the same value of decode_time. - use_cftime : bool, optional - Only relevant if encoded dates come from a standard calendar - (e.g. "gregorian", "proleptic_gregorian", "standard", or not - specified). If None (default), attempt to decode times to - ``np.datetime64[ns]`` objects; if this is not possible, decode times to - ``cftime.datetime`` objects. If True, always decode times to - ``cftime.datetime`` objects, regardless of whether or not they can be - represented using ``np.datetime64[ns]`` objects. If False, always - decode times to ``np.datetime64[ns]`` objects; if this is not possible - raise an error. zarr_version : int or None, optional .. deprecated:: 2024.9.1 @@ -1488,9 +1535,12 @@ def open_zarr( chunks = None if kwargs: - raise TypeError( - "open_zarr() got unexpected keyword arguments " + ",".join(kwargs.keys()) - ) + invalid_kwargs = _validate_kwargs_for_dataclass(CoderOptions, kwargs) + if invalid_kwargs: + raise TypeError( + "open_zarr() got unexpected keyword arguments " + + ",".join(invalid_kwargs.keys()) + ) backend_kwargs = { "synchronizer": synchronizer, @@ -1505,21 +1555,15 @@ def open_zarr( ds = open_dataset( filename_or_obj=store, group=group, - decode_cf=decode_cf, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, engine="zarr", chunks=chunks, - drop_variables=drop_variables, chunked_array_type=chunked_array_type, from_array_kwargs=from_array_kwargs, backend_kwargs=backend_kwargs, - decode_timedelta=decode_timedelta, - use_cftime=use_cftime, zarr_version=zarr_version, use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask, + coder_options=coder_options, + **kwargs, ) return ds @@ -1553,15 +1597,7 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, group=None, - mode="r", synchronizer=None, consolidated=None, chunk_store=None, @@ -1572,13 +1608,14 @@ def open_dataset( engine=None, use_zarr_fill_value_as_mask=None, cache_members: bool = True, + coder_options: Optional[CoderOptions] = None, ) -> Dataset: + # coder_options = coder_options if coder_options is not None else self.coder_options filename_or_obj = _normalize_path(filename_or_obj) if not store: store = ZarrStore.open_group( filename_or_obj, group=group, - mode=mode, synchronizer=synchronizer, consolidated=consolidated, consolidate_on_close=False, @@ -1594,13 +1631,7 @@ def open_dataset( with close_on_error(store): ds = store_entrypoint.open_dataset( store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + coder_options=coder_options, ) return ds @@ -1608,40 +1639,26 @@ def open_datatree( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, group: str | None = None, - mode="r", synchronizer=None, consolidated=None, chunk_store=None, storage_options=None, zarr_version=None, zarr_format=None, + coder_options: Optional[CoderOptions] = None, ) -> DataTree: filename_or_obj = _normalize_path(filename_or_obj) groups_dict = self.open_groups_as_dict( filename_or_obj=filename_or_obj, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, group=group, - mode=mode, synchronizer=synchronizer, consolidated=consolidated, chunk_store=chunk_store, storage_options=storage_options, zarr_version=zarr_version, zarr_format=zarr_format, + coder_options=coder_options, ) return datatree_from_dict_with_io_cleanup(groups_dict) @@ -1650,21 +1667,14 @@ def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, group: str | None = None, - mode="r", synchronizer=None, consolidated=None, chunk_store=None, storage_options=None, zarr_version=None, zarr_format=None, + coder_options: Optional[CoderOptions] = None, ) -> dict[str, Dataset]: filename_or_obj = _normalize_path(filename_or_obj) @@ -1677,7 +1687,6 @@ def open_groups_as_dict( stores = ZarrStore.open_store( filename_or_obj, group=parent, - mode=mode, synchronizer=synchronizer, consolidated=consolidated, consolidate_on_close=False, @@ -1694,13 +1703,7 @@ def open_groups_as_dict( with close_on_error(store): group_ds = store_entrypoint.open_dataset( store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, + coder_options=coder_options, ) if group: group_name = str(NodePath(path_group).relative_to(parent)) diff --git a/xarray/conventions.py b/xarray/conventions.py index c9cd2a5dcdc..a4f42bd7042 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -382,7 +382,7 @@ def stackable(dim: Hashable) -> bool: if isinstance(drop_variables, str): drop_variables = [drop_variables] - elif drop_variables is None: + elif drop_variables is None or drop_variables is False: drop_variables = [] drop_variables = set(drop_variables) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 68ff9233080..7e8e8f9bf19 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2333,7 +2333,7 @@ def save(self, dataset, store_target, **kwargs): # type: ignore[override] @contextlib.contextmanager def open(self, path, **kwargs): with xr.open_dataset( - path, engine="zarr", mode="r", **kwargs, **self.version_kwargs + path, engine="zarr", **kwargs, **self.version_kwargs ) as ds: yield ds @@ -4356,14 +4356,13 @@ def test_phony_dims_warning(self) -> None: fx = f.create_group(grp) for k, v in var.items(): fx.create_dataset(k, data=v) - with pytest.warns(UserWarning, match="The 'phony_dims' kwarg"): - with xr.open_dataset(tmp_file, engine="h5netcdf", group="bar") as ds: - assert ds.sizes == { - "phony_dim_0": 5, - "phony_dim_1": 5, - "phony_dim_2": 5, - "phony_dim_3": 25, - } + with xr.open_dataset(tmp_file, engine="h5netcdf", group="bar") as ds: + assert ds.sizes == { + "phony_dim_0": 5, + "phony_dim_1": 5, + "phony_dim_2": 5, + "phony_dim_3": 25, + } @requires_h5netcdf diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index 9342423b727..f8d7b6b7a71 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -69,7 +69,7 @@ def open_dataset( class PassThroughBackendEntrypoint(xr.backends.BackendEntrypoint): """Access an object passed to the `open_dataset` method.""" - def open_dataset(self, dataset, *, drop_variables=None): + def open_dataset(self, dataset, *, coder_options=None): """Return the first argument.""" return dataset diff --git a/xarray/tests/test_backends_datatree.py b/xarray/tests/test_backends_datatree.py index 6b3674e1a8c..c7f329104f5 100644 --- a/xarray/tests/test_backends_datatree.py +++ b/xarray/tests/test_backends_datatree.py @@ -530,14 +530,13 @@ def test_phony_dims_warning(self, tmpdir) -> None: for k, v in var.items(): fx.create_dataset(k, data=v) - with pytest.warns(UserWarning, match="The 'phony_dims' kwarg"): - with open_datatree(filepath, engine=self.engine) as tree: - assert tree.bar.dims == { - "phony_dim_0": 5, - "phony_dim_1": 5, - "phony_dim_2": 5, - "phony_dim_3": 25, - } + with open_datatree(filepath, engine=self.engine) as tree: + assert tree.bar.dims == { + "phony_dim_0": 5, + "phony_dim_1": 5, + "phony_dim_2": 5, + "phony_dim_3": 25, + } @requires_zarr diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index b4817d7442f..cc444304091 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -126,9 +126,9 @@ def test_set_missing_parameters() -> None: def test_set_missing_parameters_raise_error() -> None: - backend = DummyBackendEntrypointKwargs - with pytest.raises(TypeError): - plugins.set_missing_parameters({"engine": backend}) + # backend = DummyBackendEntrypointKwargs + # with pytest.raises(TypeError): + # plugins.set_missing_parameters({"engine": backend}) backend_args = DummyBackendEntrypointArgs with pytest.raises(TypeError): From da552d4fe7656d173e5c187a0a4550c4f422b1c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 11:53:12 +0200 Subject: [PATCH 02/15] array-api-strict hotfix --- ci/requirements/all-but-dask.yml | 2 +- ci/requirements/all-but-numba.yml | 2 +- ci/requirements/environment-3.14.yml | 2 +- ci/requirements/environment-windows-3.14.yml | 2 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index ca4943bddb1..5f5db4a0f18 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -4,7 +4,7 @@ channels: - nodefaults dependencies: - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/all-but-numba.yml b/ci/requirements/all-but-numba.yml index fa7ad81f198..7c492aec704 100644 --- a/ci/requirements/all-but-numba.yml +++ b/ci/requirements/all-but-numba.yml @@ -6,7 +6,7 @@ dependencies: # Pin a "very new numpy" (updated Sept 24, 2024) - numpy>=2.1.1 - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/environment-3.14.yml b/ci/requirements/environment-3.14.yml index 1e6ee7ff5f9..06c4df82663 100644 --- a/ci/requirements/environment-3.14.yml +++ b/ci/requirements/environment-3.14.yml @@ -4,7 +4,7 @@ channels: - nodefaults dependencies: - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/environment-windows-3.14.yml b/ci/requirements/environment-windows-3.14.yml index 4eb2049f2e6..dd48add6b73 100644 --- a/ci/requirements/environment-windows-3.14.yml +++ b/ci/requirements/environment-windows-3.14.yml @@ -2,7 +2,7 @@ name: xarray-tests channels: - conda-forge dependencies: - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 45cbebd38db..3213ef687d3 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -2,7 +2,7 @@ name: xarray-tests channels: - conda-forge dependencies: - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index a9499694e15..fc54b6600fe 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -4,7 +4,7 @@ channels: - nodefaults dependencies: - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy From 65045fd89f499c53c28cd6c4f0241d42aa43d053 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 12:17:42 +0200 Subject: [PATCH 03/15] try to get typing correct --- xarray/backends/common.py | 2 +- xarray/backends/h5netcdf_.py | 8 ++++---- xarray/backends/netCDF4_.py | 8 ++++---- xarray/backends/pydap_.py | 4 ++-- xarray/backends/scipy_.py | 4 ++-- xarray/backends/store.py | 4 ++-- xarray/backends/zarr.py | 10 +++++----- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index b7d50def671..446b7cd9e5f 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -830,7 +830,7 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - coder_options: Union[bool, CoderOptions, None] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, ) -> Dataset: """ Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 19197d4aad5..fc443a6ab1d 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -3,7 +3,7 @@ import functools import io import os -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Optional, Union import numpy as np @@ -428,7 +428,7 @@ def open_dataset( driver=None, driver_kwds=None, storage_options: dict[str, Any] | None = None, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, **kwargs, ) -> Dataset: coder_options = ( @@ -469,7 +469,7 @@ def open_datatree( decode_vlen_strings=True, driver=None, driver_kwds=None, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( @@ -500,7 +500,7 @@ def open_groups_as_dict( decode_vlen_strings=True, driver=None, driver_kwds=None, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, **kwargs, ) -> dict[str, Dataset]: from xarray.backends.common import _iter_nc_groups diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index c9070d20e43..d55ab3e20ed 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -6,7 +6,7 @@ from collections.abc import Mapping from contextlib import suppress from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Literal, Optional +from typing import TYPE_CHECKING, Any, Literal, Optional, Union import numpy as np @@ -669,7 +669,7 @@ def open_dataset( auto_complex=None, lock=None, autoclose=False, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, ) -> Dataset: coder_options = ( coder_options if coder_options is not None else self.coder_options @@ -707,7 +707,7 @@ def open_datatree( auto_complex=None, lock=None, autoclose=False, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( @@ -737,7 +737,7 @@ def open_groups_as_dict( auto_complex=None, lock=None, autoclose=False, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, **kwargs, ) -> dict[str, Dataset]: from xarray.backends.common import _iter_nc_groups diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index f54a07d9a9d..2e1b4624ea7 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Optional, Union import numpy as np @@ -280,7 +280,7 @@ def open_groups_as_dict( timeout=None, verify=None, user_charset=None, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, **kwargs, ) -> dict[str, Dataset]: from xarray.core.treenode import NodePath diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 7d81abd7c72..061efea0631 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -3,7 +3,7 @@ import gzip import io import os -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Optional, Union import numpy as np @@ -314,7 +314,7 @@ def open_dataset( group=None, mmap=None, lock=None, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, **kwargs, ) -> Dataset: coder_options = ( diff --git a/xarray/backends/store.py b/xarray/backends/store.py index e83239a1e56..e2a3a4ea7e5 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Optional, Union from xarray import conventions from xarray.backends.common import ( @@ -31,7 +31,7 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, ) -> Dataset: assert isinstance(filename_or_obj, AbstractDataStore) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index b89b7248051..9de3a75541e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -5,7 +5,7 @@ import os import struct from collections.abc import Hashable, Iterable, Mapping -from typing import TYPE_CHECKING, Any, Literal, Optional, cast +from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast import numpy as np import pandas as pd @@ -1349,7 +1349,7 @@ def open_zarr( use_zarr_fill_value_as_mask=None, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, **kwargs, ): """Load and decode a dataset from a Zarr store. @@ -1608,7 +1608,7 @@ def open_dataset( engine=None, use_zarr_fill_value_as_mask=None, cache_members: bool = True, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, ) -> Dataset: # coder_options = coder_options if coder_options is not None else self.coder_options filename_or_obj = _normalize_path(filename_or_obj) @@ -1646,7 +1646,7 @@ def open_datatree( storage_options=None, zarr_version=None, zarr_format=None, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, ) -> DataTree: filename_or_obj = _normalize_path(filename_or_obj) groups_dict = self.open_groups_as_dict( @@ -1674,7 +1674,7 @@ def open_groups_as_dict( storage_options=None, zarr_version=None, zarr_format=None, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, ) -> dict[str, Dataset]: filename_or_obj = _normalize_path(filename_or_obj) From 90c45be7a76ffe34670f6ff87ab6c2ced464e0c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 13:44:21 +0200 Subject: [PATCH 04/15] handle deprecated downstream packages faithfully --- xarray/backends/api.py | 42 +++++++++++++++++++++++++++++++-------- xarray/backends/common.py | 7 ++++++- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c6d3dba7262..515e8c1b766 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -30,6 +30,7 @@ from xarray.backends.common import ( AbstractDataStore, ArrayWriter, + BaseCoderOptions, CoderOptions, _find_absolute_paths, _normalize_path, @@ -43,7 +44,7 @@ from xarray.core.indexes import Index from xarray.core.treenode import group_subtrees from xarray.core.types import NetcdfWriteModes, ZarrWriteModes -from xarray.core.utils import is_remote_uri +from xarray.core.utils import emit_user_level_warning, is_remote_uri from xarray.namedarray.daskmanager import DaskManager from xarray.namedarray.parallelcompat import guess_chunkmanager from xarray.structure.chunks import _get_chunk, _maybe_chunk @@ -487,22 +488,39 @@ def _datatree_from_backend_datatree( def _resolve_decoders_options(coder_options, backend, decoders): # initialize CoderOptions with decoders if not given # Deprecation Fallback + deprecated = False if coder_options is False: coder_options = _reset_dataclass_to_false(backend.coder_options) elif coder_options is True: coder_options = backend.coder_options elif coder_options is None: decode_cf = decoders.pop("decode_cf", None) + + # deprecation fallback + _coder_options = backend.coder_options + if type(_coder_options) is BaseCoderOptions: + coder_options = CoderOptions() + coder_class = CoderOptions + emit_user_level_warning( + "'coder_options' keyword argument introduced, grouping together " + f"all decoder keyword arguments. Please update {backend} accordingly.", + FutureWarning, + ) + deprecated = True + else: + coder_options = backend.coder_options + coder_class = backend.coder_class if decode_cf is False: - coder_options = _reset_dataclass_to_false(backend.coder_options) + coder_options = _reset_dataclass_to_false(coder_options) else: - field_names = {f.name for f in fields(backend.coder_class)} + field_names = {f.name for f in fields(coder_class)} coders = {} for d in list(decoders): if d in field_names: coders[d] = decoders.pop(d) - coder_options = backend.coder_class(**coders) - return coder_options + coder_options = coder_class(**coders) + + return coder_options, deprecated def open_dataset( @@ -707,13 +725,21 @@ def open_dataset( backend = plugins.get_backend(engine) # initialize coder_options per kwargs if not given - coder_options = _resolve_decoders_options(coder_options, backend, kwargs) + coder_options, deprecated = _resolve_decoders_options( + coder_options, backend, kwargs + ) overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) + + # deprecation fallback + nkwargs = kwargs.copy() + if deprecated: + nkwargs.update(coder_options.to_kwargs()) + else: + nkwargs.update(coder_options=coder_options) backend_ds = backend.open_dataset( filename_or_obj, - coder_options=coder_options, - **kwargs, + **nkwargs, ) ds = _dataset_from_backend_dataset( backend_ds, diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 446b7cd9e5f..2281a5078fc 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -671,7 +671,12 @@ def _validate_kwargs_for_dataclass(cls, kwargs): @dataclass(frozen=True, kw_only=True) -class CoderOptions: +class BaseCoderOptions: + pass + + +@dataclass(frozen=True, kw_only=True) +class CoderOptions(BaseCoderOptions): """ CF Coding Options. From 9d9d2bf371fa7eaa0476ae4ed181d9a60e3eaf97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 14:00:35 +0200 Subject: [PATCH 05/15] fix deprecation --- xarray/backends/api.py | 29 +++++++++++++++++++++++------ xarray/backends/common.py | 2 +- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 515e8c1b766..d21f6d01767 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -734,7 +734,7 @@ def open_dataset( # deprecation fallback nkwargs = kwargs.copy() if deprecated: - nkwargs.update(coder_options.to_kwargs()) + nkwargs.update(**coder_options.to_kwargs()) else: nkwargs.update(coder_options=coder_options) backend_ds = backend.open_dataset( @@ -1176,13 +1176,21 @@ def open_datatree( backend = plugins.get_backend(engine) - coder_options = _resolve_decoders_options(coder_options, backend, kwargs) + coder_options, deprecated = _resolve_decoders_options( + coder_options, backend, kwargs + ) overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) + # deprecation fallback + nkwargs = kwargs.copy() + if deprecated: + nkwargs.update(**coder_options.to_kwargs()) + else: + nkwargs.update(coder_options=coder_options) + backend_tree = backend.open_datatree( filename_or_obj, - coder_options=coder_options, - **kwargs, + **nkwargs, ) tree = _datatree_from_backend_datatree( @@ -1401,12 +1409,21 @@ def open_groups( backend = plugins.get_backend(engine) - coder_options = _resolve_decoders_options(coder_options, backend, kwargs) + coder_options, deprecated = _resolve_decoders_options( + coder_options, backend, kwargs + ) overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) + + # deprecation fallback + nkwargs = kwargs.copy() + if deprecated: + nkwargs.update(**coder_options.to_kwargs()) + else: + nkwargs.update(coder_options=coder_options) backend_groups = backend.open_groups_as_dict( filename_or_obj, - coder_options=coder_options, + **nkwargs, ) groups = { diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 2281a5078fc..02381129593 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -812,7 +812,7 @@ class BackendEntrypoint: open_dataset_parameters: ClassVar[tuple | None] = None description: ClassVar[str] = "" url: ClassVar[str] = "" - coder_class = CoderOptions + coder_class = BaseCoderOptions def __init__( self, From dba23bf06f7059936abaa986d937b53417841f64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 14:18:25 +0200 Subject: [PATCH 06/15] fix coder_class usage --- xarray/backends/scipy_.py | 2 ++ xarray/backends/store.py | 2 ++ xarray/backends/zarr.py | 1 + xarray/tests/test_backends_api.py | 10 ++++++++-- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 061efea0631..4f823b3b48c 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -286,6 +286,8 @@ class ScipyBackendEntrypoint(BackendEntrypoint): backends.H5netcdfBackendEntrypoint """ + coder_class = CoderOptions + description = "Open netCDF files (.nc, .nc4, .cdf and .gz) using scipy in Xarray" url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.ScipyBackendEntrypoint.html" diff --git a/xarray/backends/store.py b/xarray/backends/store.py index e2a3a4ea7e5..b0daa0c9619 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -21,6 +21,8 @@ class StoreBackendEntrypoint(BackendEntrypoint): description = "Open AbstractDataStore instances in Xarray" url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.StoreBackendEntrypoint.html" + coder_class = CoderOptions + def guess_can_open( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 9de3a75541e..ea72a1c367b 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1580,6 +1580,7 @@ class ZarrBackendEntrypoint(BackendEntrypoint): backends.ZarrStore """ + coder_class = CoderOptions description = "Open zarr files (.zarr) using zarr in Xarray" url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.ZarrBackendEntrypoint.html" diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index f8d7b6b7a71..322f0f18599 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -35,10 +35,12 @@ def test_custom_engine() -> None: ) class CustomBackend(xr.backends.BackendEntrypoint): + coder_class = xr.backends.CoderOptions + def open_dataset( self, filename_or_obj, - drop_variables=None, + coder_options=None, **kwargs, ) -> xr.Dataset: return expected.copy(deep=True) @@ -54,10 +56,12 @@ def test_multiindex() -> None: dataset = dataset.stack(z=["coord1", "coord2"]) class MultiindexBackend(xr.backends.BackendEntrypoint): + coder_class = xr.backends.CoderOptions + def open_dataset( self, filename_or_obj, - drop_variables=None, + coder_options=None, **kwargs, ) -> xr.Dataset: return dataset.copy(deep=True) @@ -69,6 +73,8 @@ def open_dataset( class PassThroughBackendEntrypoint(xr.backends.BackendEntrypoint): """Access an object passed to the `open_dataset` method.""" + coder_class = xr.backends.CoderOptions + def open_dataset(self, dataset, *, coder_options=None): """Return the first argument.""" return dataset From caf3ced4549adabda7ec778df8678da68e6f32ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 14:30:04 +0200 Subject: [PATCH 07/15] fix pydap --- xarray/backends/pydap_.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 2e1b4624ea7..86864016048 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -203,6 +203,7 @@ class PydapBackendEntrypoint(BackendEntrypoint): backends.PydapDataStore """ + coder_class = CoderOptions description = "Open remote datasets via OPeNDAP using pydap in Xarray" url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.PydapBackendEntrypoint.html" @@ -223,7 +224,7 @@ def open_dataset( timeout=None, verify=None, user_charset=None, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, **kwargs, ) -> Dataset: store = PydapDataStore.open( @@ -254,7 +255,7 @@ def open_datatree( timeout=None, verify=None, user_charset=None, - coder_options: Optional[CoderOptions] = None, + coder_options: Optional[Union[bool, CoderOptions]] = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( From dec91b27518f378bd2a3ae0455730c13c0120bbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 14:38:23 +0200 Subject: [PATCH 08/15] fix pydap --- xarray/backends/pydap_.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 86864016048..aa1830c536f 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -259,6 +259,7 @@ def open_datatree( **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( + filename_or_obj=filename_or_obj, group=group, application=None, session=None, From 9e9524aa3899cabf5fb78455e4566a3f27498859 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 14:54:08 +0200 Subject: [PATCH 09/15] fix coder_class vs coder_options --- xarray/backends/api.py | 8 ++++---- xarray/backends/common.py | 9 --------- xarray/backends/h5netcdf_.py | 2 +- xarray/backends/netCDF4_.py | 2 +- xarray/backends/scipy_.py | 2 +- xarray/backends/store.py | 2 +- xarray/backends/zarr.py | 4 +++- 7 files changed, 11 insertions(+), 18 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index d21f6d01767..cc3152bb36d 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -490,14 +490,14 @@ def _resolve_decoders_options(coder_options, backend, decoders): # Deprecation Fallback deprecated = False if coder_options is False: - coder_options = _reset_dataclass_to_false(backend.coder_options) + coder_options = _reset_dataclass_to_false(backend.coder_class()) elif coder_options is True: - coder_options = backend.coder_options + coder_options = backend.coder_class() elif coder_options is None: decode_cf = decoders.pop("decode_cf", None) # deprecation fallback - _coder_options = backend.coder_options + _coder_options = backend.coder_class() if type(_coder_options) is BaseCoderOptions: coder_options = CoderOptions() coder_class = CoderOptions @@ -508,7 +508,7 @@ def _resolve_decoders_options(coder_options, backend, decoders): ) deprecated = True else: - coder_options = backend.coder_options + coder_options = backend.coder_class() coder_class = backend.coder_class if decode_cf is False: coder_options = _reset_dataclass_to_false(coder_options) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 02381129593..2daeeb440c3 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -814,15 +814,6 @@ class BackendEntrypoint: url: ClassVar[str] = "" coder_class = BaseCoderOptions - def __init__( - self, - coder_options: Optional[CoderOptions] = None, - ): - # Instantiate default coder_options - self.coder_options = ( - coder_options if coder_options is not None else self.coder_class() - ) - def __repr__(self) -> str: txt = f"<{type(self).__name__}>" if self.description: diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index fc443a6ab1d..2e4aa1239b7 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -432,7 +432,7 @@ def open_dataset( **kwargs, ) -> Dataset: coder_options = ( - coder_options if coder_options is not None else self.coder_options + coder_options if coder_options is not None else self.coder_class() ) filename_or_obj = _normalize_path(filename_or_obj) store = H5NetCDFStore.open( diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index d55ab3e20ed..2c17c7fd8bb 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -672,7 +672,7 @@ def open_dataset( coder_options: Optional[Union[bool, CoderOptions]] = None, ) -> Dataset: coder_options = ( - coder_options if coder_options is not None else self.coder_options + coder_options if coder_options is not None else self.coder_class() ) filename_or_obj = _normalize_path(filename_or_obj) store = NetCDF4DataStore.open( diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 4f823b3b48c..ebc34fed9f1 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -320,7 +320,7 @@ def open_dataset( **kwargs, ) -> Dataset: coder_options = ( - coder_options if coder_options is not None else self.coder_options + coder_options if coder_options is not None else self.coder_class() ) filename_or_obj = _normalize_path(filename_or_obj) store = ScipyDataStore( diff --git a/xarray/backends/store.py b/xarray/backends/store.py index b0daa0c9619..3381954275e 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -41,7 +41,7 @@ def open_dataset( encoding = filename_or_obj.get_encoding() coder_options = ( - coder_options if coder_options is not None else self.coder_options + coder_options if coder_options is not None else self.coder_class() ) vars, attrs, coord_names = conventions.decode_cf_variables( vars, attrs, **coder_options.to_kwargs() diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index ea72a1c367b..bbe00a7e45c 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1611,7 +1611,9 @@ def open_dataset( cache_members: bool = True, coder_options: Optional[Union[bool, CoderOptions]] = None, ) -> Dataset: - # coder_options = coder_options if coder_options is not None else self.coder_options + coder_options = ( + coder_options if coder_options is not None else self.coder_class() + ) filename_or_obj = _normalize_path(filename_or_obj) if not store: store = ZarrStore.open_group( From fb77adf3abffcc4373c80dd42a1aaffb8040314b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 15:14:13 +0200 Subject: [PATCH 10/15] remove overloading of coder_options --- xarray/backends/api.py | 15 ++++++--------- xarray/backends/common.py | 6 +++--- xarray/backends/h5netcdf_.py | 8 ++++---- xarray/backends/netCDF4_.py | 8 ++++---- xarray/backends/pydap_.py | 8 ++++---- xarray/backends/scipy_.py | 4 ++-- xarray/backends/store.py | 4 ++-- xarray/backends/zarr.py | 10 +++++----- 8 files changed, 30 insertions(+), 33 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index cc3152bb36d..f68da177f59 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -18,6 +18,7 @@ Any, Final, Literal, + Optional, Union, cast, overload, @@ -489,11 +490,7 @@ def _resolve_decoders_options(coder_options, backend, decoders): # initialize CoderOptions with decoders if not given # Deprecation Fallback deprecated = False - if coder_options is False: - coder_options = _reset_dataclass_to_false(backend.coder_class()) - elif coder_options is True: - coder_options = backend.coder_class() - elif coder_options is None: + if coder_options is None: decode_cf = decoders.pop("decode_cf", None) # deprecation fallback @@ -533,7 +530,7 @@ def open_dataset( chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, - coder_options: Union[bool, CoderOptions, None] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> Dataset: """Open and decode a dataset from a file or file-like object. @@ -767,7 +764,7 @@ def open_dataarray( chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, - coder_options: Union[bool, CoderOptions, None] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> DataArray: """Open an DataArray from a file or file-like object containing a single @@ -991,7 +988,7 @@ def open_datatree( chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, - coder_options: Union[bool, CoderOptions, None] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> DataTree: """ @@ -1220,7 +1217,7 @@ def open_groups( chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, - coder_options: Union[bool, CoderOptions, None] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> dict[str, Dataset]: """ diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 2daeeb440c3..aec69a1102d 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -826,7 +826,7 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, ) -> Dataset: """ Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. @@ -848,7 +848,7 @@ def open_datatree( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - coder_options: Union[bool, CoderOptions, None] = None, + coder_options: Optional[CoderOptions] = None, ) -> DataTree: """ Backend open_datatree method used by Xarray in :py:func:`~xarray.open_datatree`. @@ -860,7 +860,7 @@ def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - coder_options: Union[bool, CoderOptions, None] = None, + coder_options: Optional[CoderOptions] = None, ) -> dict[str, Dataset]: """ Opens a dictionary mapping from group names to Datasets. diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 2e4aa1239b7..9b4ebbe4598 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -3,7 +3,7 @@ import functools import io import os -from typing import TYPE_CHECKING, Any, Optional, Union +from typing import TYPE_CHECKING, Any, Optional import numpy as np @@ -428,7 +428,7 @@ def open_dataset( driver=None, driver_kwds=None, storage_options: dict[str, Any] | None = None, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> Dataset: coder_options = ( @@ -469,7 +469,7 @@ def open_datatree( decode_vlen_strings=True, driver=None, driver_kwds=None, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( @@ -500,7 +500,7 @@ def open_groups_as_dict( decode_vlen_strings=True, driver=None, driver_kwds=None, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> dict[str, Dataset]: from xarray.backends.common import _iter_nc_groups diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 2c17c7fd8bb..bb56c4499a2 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -6,7 +6,7 @@ from collections.abc import Mapping from contextlib import suppress from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Literal, Optional, Union +from typing import TYPE_CHECKING, Any, Literal, Optional import numpy as np @@ -669,7 +669,7 @@ def open_dataset( auto_complex=None, lock=None, autoclose=False, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, ) -> Dataset: coder_options = ( coder_options if coder_options is not None else self.coder_class() @@ -707,7 +707,7 @@ def open_datatree( auto_complex=None, lock=None, autoclose=False, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( @@ -737,7 +737,7 @@ def open_groups_as_dict( auto_complex=None, lock=None, autoclose=False, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> dict[str, Dataset]: from xarray.backends.common import _iter_nc_groups diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index aa1830c536f..01d27f37975 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional, Union +from typing import TYPE_CHECKING, Any, Optional import numpy as np @@ -224,7 +224,7 @@ def open_dataset( timeout=None, verify=None, user_charset=None, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> Dataset: store = PydapDataStore.open( @@ -255,7 +255,7 @@ def open_datatree( timeout=None, verify=None, user_charset=None, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( @@ -282,7 +282,7 @@ def open_groups_as_dict( timeout=None, verify=None, user_charset=None, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> dict[str, Dataset]: from xarray.core.treenode import NodePath diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index ebc34fed9f1..b44df005ec4 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -3,7 +3,7 @@ import gzip import io import os -from typing import TYPE_CHECKING, Any, Optional, Union +from typing import TYPE_CHECKING, Any, Optional import numpy as np @@ -316,7 +316,7 @@ def open_dataset( group=None, mmap=None, lock=None, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ) -> Dataset: coder_options = ( diff --git a/xarray/backends/store.py b/xarray/backends/store.py index 3381954275e..d597fea8394 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional, Union +from typing import TYPE_CHECKING, Any, Optional from xarray import conventions from xarray.backends.common import ( @@ -33,7 +33,7 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, ) -> Dataset: assert isinstance(filename_or_obj, AbstractDataStore) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index bbe00a7e45c..e1b1f2d7350 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -5,7 +5,7 @@ import os import struct from collections.abc import Hashable, Iterable, Mapping -from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Literal, Optional, cast import numpy as np import pandas as pd @@ -1349,7 +1349,7 @@ def open_zarr( use_zarr_fill_value_as_mask=None, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, **kwargs, ): """Load and decode a dataset from a Zarr store. @@ -1609,7 +1609,7 @@ def open_dataset( engine=None, use_zarr_fill_value_as_mask=None, cache_members: bool = True, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, ) -> Dataset: coder_options = ( coder_options if coder_options is not None else self.coder_class() @@ -1649,7 +1649,7 @@ def open_datatree( storage_options=None, zarr_version=None, zarr_format=None, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, ) -> DataTree: filename_or_obj = _normalize_path(filename_or_obj) groups_dict = self.open_groups_as_dict( @@ -1677,7 +1677,7 @@ def open_groups_as_dict( storage_options=None, zarr_version=None, zarr_format=None, - coder_options: Optional[Union[bool, CoderOptions]] = None, + coder_options: Optional[CoderOptions] = None, ) -> dict[str, Dataset]: filename_or_obj = _normalize_path(filename_or_obj) From 394038af76bbd8ea38a29e033a5184b63ba63a94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 15:23:51 +0200 Subject: [PATCH 11/15] fix typing --- xarray/backends/api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index f68da177f59..1093e0ea17f 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -486,7 +486,9 @@ def _datatree_from_backend_datatree( return tree -def _resolve_decoders_options(coder_options, backend, decoders): +def _resolve_decoders_options( + coder_options, backend, decoders +) -> tuple[CoderOptions, bool]: # initialize CoderOptions with decoders if not given # Deprecation Fallback deprecated = False From 7fc29e27489965964fd7a553dbc1e9c168fb8ce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 15:52:57 +0200 Subject: [PATCH 12/15] try to fix docs --- doc/user-guide/io.rst | 1 + xarray/backends/api.py | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 4f1748ce3c2..0bbf56a242b 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -1127,6 +1127,7 @@ If the file were instead stored remotely (e.g. ``s3://saved_on_disk.h5``) you ca that are used to `configure fsspec `_: .. jupyter-execute:: + :stderr: ds_kerchunked = xr.open_dataset( "./combined.json", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 1093e0ea17f..194b4637a78 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1108,19 +1108,18 @@ def open_datatree( inconsistent values. .. versionadded:: 2025.06.2 - The new keyword argument 'coder_options' was added. For backwards + The new keyword argument ``coder_options`` was added. For backwards compatibility coder_options can be given as keyword arguments, too. - inline_array: bool, default: False How to include the array in the dask task graph. - By default(``inline_array=False``) the array is included in a task by + By default (``inline_array=False``) the array is included in a task by itself, and each chunk refers to that task by its key. With ``inline_array=True``, Dask will instead inline the array directly in the values of the task graph. See :py:func:`dask.array.from_array`. chunked_array_type: str, optional Which chunked array type to coerce this datasets' arrays to. - Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system. + Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEntryPoint` system. Experimental API that should not be relied upon. from_array_kwargs: dict Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create From f1afa23e3d640bc112440ee5e7af0dd23679d44b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 18 Jun 2025 16:04:32 +0200 Subject: [PATCH 13/15] fix docstrings --- xarray/backends/api.py | 8 ++++---- xarray/backends/zarr.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 194b4637a78..b78c047d062 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -572,7 +572,7 @@ def open_dataset( argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. - coder_options : bool or CoderOptions, optional + coder_options : CoderOptions, optional Dataclass containing below keyword arguments to pass to cf decoding. If set, overrides any given keyword arguments: @@ -809,7 +809,7 @@ def open_dataarray( argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. - coder_options : bool or CoderOptions, optional + coder_options : CoderOptions, optional Dataclass containing below keyword arguments to pass to cf decoding. If set, overrides any given keyword arguments: @@ -1026,7 +1026,7 @@ def open_datatree( argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. - coder_options : bool or CoderOptions, optional + coder_options : CoderOptions, optional Dataclass containing below keyword arguments to pass to cf decoding. If set, overrides any given keyword arguments: @@ -1258,7 +1258,7 @@ def open_groups( argument to use dask, in which case it defaults to False. Does not change the behavior of coordinates corresponding to dimensions, which always load their data from disk into a ``pandas.Index``. - coder_options : bool or CoderOptions, optional + coder_options : CoderOptions, optional Dataclass containing below keyword arguments to pass to cf decoding. If set, overrides any given keyword arguments: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index e1b1f2d7350..7aec27bd0df 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1382,7 +1382,7 @@ def open_zarr( overwrite_encoded_chunks : bool, optional Whether to drop the zarr chunks encoded for each variable when a dataset is loaded with specified chunk sizes (default: False) - coder_options : bool or CoderOptions, optional + coder_options : CoderOptions, optional Dataclass containing below keyword arguments to pass to cf decoding. If set, overrides any given keyword arguments: From 47df8802435ea94de519790cf6863da6c8f288c5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Jun 2025 06:09:47 +0000 Subject: [PATCH 14/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/api.py | 9 ++++----- xarray/backends/common.py | 25 ++++++++++--------------- xarray/backends/h5netcdf_.py | 8 ++++---- xarray/backends/netCDF4_.py | 20 +++++++++----------- xarray/backends/pydap_.py | 8 ++++---- xarray/backends/scipy_.py | 4 ++-- xarray/backends/store.py | 4 ++-- xarray/backends/zarr.py | 10 +++++----- 8 files changed, 40 insertions(+), 48 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e9450e32cb3..e18bc86415d 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -19,7 +19,6 @@ Any, Final, Literal, - Optional, Union, cast, overload, @@ -533,7 +532,7 @@ def open_dataset( chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> Dataset: """Open and decode a dataset from a file or file-like object. @@ -767,7 +766,7 @@ def open_dataarray( chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> DataArray: """Open an DataArray from a file or file-like object containing a single @@ -991,7 +990,7 @@ def open_datatree( chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> DataTree: """ @@ -1219,7 +1218,7 @@ def open_groups( chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, backend_kwargs: dict[str, Any] | None = None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> dict[str, Dataset]: """ diff --git a/xarray/backends/common.py b/xarray/backends/common.py index b72251f24b6..5bdc50a8be1 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -12,7 +12,6 @@ Any, ClassVar, Literal, - Optional, TypeVar, Union, overload, @@ -759,17 +758,13 @@ class CoderOptions(BaseCoderOptions): # Todo: maybe add these two to disentangle masking from scaling? # mask: Optional[bool] = None # scale: Optional[bool] = None - mask_and_scale: Optional[bool | Mapping[str, bool]] = None - decode_times: Optional[ - bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] - ] = None - decode_timedelta: Optional[ - bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] - ] = None - use_cftime: Optional[bool | Mapping[str, bool]] = None - concat_characters: Optional[bool | Mapping[str, bool]] = None - decode_coords: Optional[Literal["coordinates", "all"] | bool] = None - drop_variables: Optional[str | Iterable[str]] = None + mask_and_scale: bool | Mapping[str, bool] | None = None + decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None = None + decode_timedelta: bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None = None + use_cftime: bool | Mapping[str, bool] | None = None + concat_characters: bool | Mapping[str, bool] | None = None + decode_coords: Literal["coordinates", "all"] | bool | None = None + drop_variables: str | Iterable[str] | None = None def to_kwargs(self): return {k: v for k, v in vars(self).items() if v is not None} @@ -826,7 +821,7 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, ) -> Dataset: """ Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. @@ -848,7 +843,7 @@ def open_datatree( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, ) -> DataTree: """ Backend open_datatree method used by Xarray in :py:func:`~xarray.open_datatree`. @@ -860,7 +855,7 @@ def open_groups_as_dict( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, ) -> dict[str, Dataset]: """ Opens a dictionary mapping from group names to Datasets. diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index a4247019f07..86e63e60eb7 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -3,7 +3,7 @@ import functools import io import os -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any import numpy as np @@ -428,7 +428,7 @@ def open_dataset( driver=None, driver_kwds=None, storage_options: dict[str, Any] | None = None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> Dataset: coder_options = ( @@ -469,7 +469,7 @@ def open_datatree( decode_vlen_strings=True, driver=None, driver_kwds=None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( @@ -500,7 +500,7 @@ def open_groups_as_dict( decode_vlen_strings=True, driver=None, driver_kwds=None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> dict[str, Dataset]: from xarray.backends.common import _iter_nc_groups diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index c3606c1c415..4f9d732522b 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -6,7 +6,7 @@ from collections.abc import Mapping from contextlib import suppress from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Literal, Optional +from typing import TYPE_CHECKING, Any, Literal import numpy as np @@ -31,13 +31,13 @@ ) from xarray.backends.netcdf3 import encode_nc3_attr_value, encode_nc3_variable from xarray.backends.store import StoreBackendEntrypoint -from xarray.coding.times import CFDatetimeCoder from xarray.coding.strings import ( CharacterArrayCoder, EncodedStringCoder, create_vlen_dtype, is_unicode_dtype, ) +from xarray.coding.times import CFDatetimeCoder from xarray.coding.variables import pop_to from xarray.core import indexing from xarray.core.utils import ( @@ -608,12 +608,10 @@ def close(self, **kwargs): @dataclass(frozen=True) class NetCDF4CoderOptions(CoderOptions): # defaults for netcdf4 based backends - mask_and_scale: Optional[bool | Mapping[str, bool]] = True - decode_times: Optional[ - bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] - ] = True - concat_characters: Optional[bool | Mapping[str, bool]] = True - decode_coords: Optional[Literal["coordinates", "all"] | bool] = True + mask_and_scale: bool | Mapping[str, bool] | None = True + decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None = True + concat_characters: bool | Mapping[str, bool] | None = True + decode_coords: Literal["coordinates", "all"] | bool | None = True class NetCDF4BackendEntrypoint(BackendEntrypoint): @@ -674,7 +672,7 @@ def open_dataset( auto_complex=None, lock=None, autoclose=False, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, ) -> Dataset: coder_options = ( coder_options if coder_options is not None else self.coder_class() @@ -712,7 +710,7 @@ def open_datatree( auto_complex=None, lock=None, autoclose=False, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( @@ -742,7 +740,7 @@ def open_groups_as_dict( auto_complex=None, lock=None, autoclose=False, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> dict[str, Dataset]: from xarray.backends.common import _iter_nc_groups diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 01d27f37975..14aa2d3b9ce 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any import numpy as np @@ -224,7 +224,7 @@ def open_dataset( timeout=None, verify=None, user_charset=None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> Dataset: store = PydapDataStore.open( @@ -255,7 +255,7 @@ def open_datatree( timeout=None, verify=None, user_charset=None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> DataTree: groups_dict = self.open_groups_as_dict( @@ -282,7 +282,7 @@ def open_groups_as_dict( timeout=None, verify=None, user_charset=None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> dict[str, Dataset]: from xarray.core.treenode import NodePath diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index ece9a98d09d..4c31c322d38 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -3,7 +3,7 @@ import gzip import io import os -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any import numpy as np @@ -316,7 +316,7 @@ def open_dataset( group=None, mmap=None, lock=None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ) -> Dataset: coder_options = ( diff --git a/xarray/backends/store.py b/xarray/backends/store.py index d597fea8394..e9db592d223 100644 --- a/xarray/backends/store.py +++ b/xarray/backends/store.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any from xarray import conventions from xarray.backends.common import ( @@ -33,7 +33,7 @@ def open_dataset( self, filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore, *, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, ) -> Dataset: assert isinstance(filename_or_obj, AbstractDataStore) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index aa1321ccea8..59a6a36c3ac 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -5,7 +5,7 @@ import os import struct from collections.abc import Hashable, Iterable, Mapping -from typing import TYPE_CHECKING, Any, Literal, Optional, cast +from typing import TYPE_CHECKING, Any, Literal, cast import numpy as np import pandas as pd @@ -1349,7 +1349,7 @@ def open_zarr( use_zarr_fill_value_as_mask=None, chunked_array_type: str | None = None, from_array_kwargs: dict[str, Any] | None = None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, **kwargs, ): """Load and decode a dataset from a Zarr store. @@ -1609,7 +1609,7 @@ def open_dataset( engine=None, use_zarr_fill_value_as_mask=None, cache_members: bool = True, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, ) -> Dataset: coder_options = ( coder_options if coder_options is not None else self.coder_class() @@ -1649,7 +1649,7 @@ def open_datatree( storage_options=None, zarr_version=None, zarr_format=None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, ) -> DataTree: filename_or_obj = _normalize_path(filename_or_obj) groups_dict = self.open_groups_as_dict( @@ -1677,7 +1677,7 @@ def open_groups_as_dict( storage_options=None, zarr_version=None, zarr_format=None, - coder_options: Optional[CoderOptions] = None, + coder_options: CoderOptions | None = None, ) -> dict[str, Dataset]: filename_or_obj = _normalize_path(filename_or_obj) From 03b8ae6de5e37a2366870f01160e9b3106d11cde Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Jul 2025 09:35:19 +0000 Subject: [PATCH 15/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/common.py | 8 ++++++-- xarray/backends/netCDF4_.py | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 5bdc50a8be1..2a829e2db26 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -759,8 +759,12 @@ class CoderOptions(BaseCoderOptions): # mask: Optional[bool] = None # scale: Optional[bool] = None mask_and_scale: bool | Mapping[str, bool] | None = None - decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None = None - decode_timedelta: bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None = None + decode_times: ( + bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None + ) = None + decode_timedelta: ( + bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None + ) = None use_cftime: bool | Mapping[str, bool] | None = None concat_characters: bool | Mapping[str, bool] | None = None decode_coords: Literal["coordinates", "all"] | bool | None = None diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 4f9d732522b..39d15d542a1 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -609,7 +609,9 @@ def close(self, **kwargs): class NetCDF4CoderOptions(CoderOptions): # defaults for netcdf4 based backends mask_and_scale: bool | Mapping[str, bool] | None = True - decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None = True + decode_times: ( + bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] | None + ) = True concat_characters: bool | Mapping[str, bool] | None = True decode_coords: Literal["coordinates", "all"] | bool | None = True