Skip to content

MetadataError from ValueError: Could not convert object to NumPy datetime #201

@TomNicholas

Description

@TomNicholas

I'm trying to debug @thodson-usgs's example from cubed-dev/cubed#520 (and originally #197).

He is doing a whole serverless reduction of virtual references to multiple files (!!! - relevant to #123), but there seem to be some more basic errors to be fixed first.

Specifically, if I try to use virtualizarr on just one of his files this happens:

import xarray as xr
from virtualizarr import open_virtual_dataset

vds = open_virtual_dataset(
    's3://wrf-se-ak-ar5/ccsm/rcp85/daily/2060/WRFDS_2060-01-01.nc',
    indexes={},
    loadable_variables=['Time'],
    cftime_variables=['Time'],
)
vds
<xarray.Dataset> Size: 31MB
Dimensions:        (Time: 1, south_north: 250, west_east: 320,
                    interp_levels: 9, soil_layers_stag: 4)
Coordinates:
    interp_levels  (interp_levels) float32 36B ManifestArray<shape=(9,), dtyp...
    Time           (Time) datetime64[ns] 8B 2060-01-01
Dimensions without coordinates: south_north, west_east, soil_layers_stag
Data variables: (12/39)
    SNOWH          (Time, south_north, west_east) float32 320kB ManifestArray...
    ACSNOW         (Time, south_north, west_east) float32 320kB ManifestArray...
    TSK            (Time, south_north, west_east) float32 320kB ManifestArray...
    XLONG          (south_north, west_east) float32 320kB ManifestArray<shape...
    T              (Time, interp_levels, south_north, west_east) float32 3MB ...
    XLAT           (south_north, west_east) float32 320kB ManifestArray<shape...
    ...             ...
    PSFC           (Time, south_north, west_east) float32 320kB ManifestArray...
    ALBEDO         (Time, south_north, west_east) float32 320kB ManifestArray...
    CLDFRA         (Time, interp_levels, south_north, west_east) float32 3MB ...
    SWDNB          (Time, south_north, west_east) float32 320kB ManifestArray...
    PW             (Time, south_north, west_east) float32 320kB ManifestArray...
    SH2O           (Time, soil_layers_stag, south_north, west_east) float32 1MB ManifestArray<shape=(1, 4, 250, 320), dtype=float32, chunks=(1, 4, 250, 32...
Attributes:
    contact:  rtladerjr@alaska.edu
    data:     Downscaled CCSM4
    date:     Mon Oct 21 11:37:23 AKDT 2019
    format:   version 2
    info:     Alaska CASC
ds = xr.open_dataset('combined.json', engine="kerchunk")
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/zarr/meta.py:127, in Metadata2.decode_array_metadata(cls, s)
    126 dimension_separator = meta.get("dimension_separator", None)
--> 127 fill_value = cls.decode_fill_value(meta["fill_value"], dtype, object_codec)
    128 meta = dict(
    129     zarr_format=meta["zarr_format"],
    130     shape=tuple(meta["shape"]),
   (...)
    136     filters=meta["filters"],
    137 )

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/zarr/meta.py:260, in Metadata2.decode_fill_value(cls, v, dtype, object_codec)
    259 else:
--> 260     return np.array(v, dtype=dtype)[()]

ValueError: Could not convert object to NumPy datetime

The above exception was the direct cause of the following exception:

MetadataError                             Traceback (most recent call last)
Cell In[8], line 1
----> 1 ds = xr.open_dataset('combined.json', engine="kerchunk")

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/backends/api.py:571, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    559 decoders = _resolve_decoders_kwargs(
    560     decode_cf,
    561     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    567     decode_coords=decode_coords,
    568 )
    570 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 571 backend_ds = backend.open_dataset(
    572     filename_or_obj,
    573     drop_variables=drop_variables,
    574     **decoders,
    575     **kwargs,
    576 )
    577 ds = _dataset_from_backend_dataset(
    578     backend_ds,
    579     filename_or_obj,
   (...)
    589     **kwargs,
    590 )
    591 return ds

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/kerchunk/xarray_backend.py:12, in KerchunkBackend.open_dataset(self, filename_or_obj, storage_options, open_dataset_options, **kw)
      8 def open_dataset(
      9     self, filename_or_obj, *, storage_options=None, open_dataset_options=None, **kw
     10 ):
     11     open_dataset_options = (open_dataset_options or {}) | kw
---> 12     ref_ds = open_reference_dataset(
     13         filename_or_obj,
     14         storage_options=storage_options,
     15         open_dataset_options=open_dataset_options,
     16     )
     17     return ref_ds

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/kerchunk/xarray_backend.py:46, in open_reference_dataset(filename_or_obj, storage_options, open_dataset_options)
     42     open_dataset_options = {}
     44 m = fsspec.get_mapper("reference://", fo=filename_or_obj, **storage_options)
---> 46 return xr.open_dataset(m, engine="zarr", consolidated=False, **open_dataset_options)

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/backends/api.py:571, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    559 decoders = _resolve_decoders_kwargs(
    560     decode_cf,
    561     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    567     decode_coords=decode_coords,
    568 )
    570 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 571 backend_ds = backend.open_dataset(
    572     filename_or_obj,
    573     drop_variables=drop_variables,
    574     **decoders,
    575     **kwargs,
    576 )
    577 ds = _dataset_from_backend_dataset(
    578     backend_ds,
    579     filename_or_obj,
   (...)
    589     **kwargs,
    590 )
    591 return ds

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/backends/zarr.py:1182, in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, stacklevel, zarr_version, store, engine)
   1180 store_entrypoint = StoreBackendEntrypoint()
   1181 with close_on_error(store):
-> 1182     ds = store_entrypoint.open_dataset(
   1183         store,
   1184         mask_and_scale=mask_and_scale,
   1185         decode_times=decode_times,
   1186         concat_characters=concat_characters,
   1187         decode_coords=decode_coords,
   1188         drop_variables=drop_variables,
   1189         use_cftime=use_cftime,
   1190         decode_timedelta=decode_timedelta,
   1191     )
   1192 return ds

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/backends/store.py:43, in StoreBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)
     29 def open_dataset(  # type: ignore[override]  # allow LSP violation, not supporting **kwargs
     30     self,
     31     filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
   (...)
     39     decode_timedelta=None,
     40 ) -> Dataset:
     41     assert isinstance(filename_or_obj, AbstractDataStore)
---> 43     vars, attrs = filename_or_obj.load()
     44     encoding = filename_or_obj.get_encoding()
     46     vars, attrs, coord_names = conventions.decode_cf_variables(
     47         vars,
     48         attrs,
   (...)
     55         decode_timedelta=decode_timedelta,
     56     )

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/backends/common.py:221, in AbstractDataStore.load(self)
    199 def load(self):
    200     """
    201     This loads the variables and attributes simultaneously.
    202     A centralized loading function makes it easier to create
   (...)
    218     are requested, so care should be taken to make sure its fast.
    219     """
    220     variables = FrozenDict(
--> 221         (_decode_variable_name(k), v) for k, v in self.get_variables().items()
    222     )
    223     attributes = FrozenDict(self.get_attrs())
    224     return variables, attributes

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/backends/zarr.py:563, in ZarrStore.get_variables(self)
    562 def get_variables(self):
--> 563     return FrozenDict(
    564         (k, self.open_store_variable(k, v)) for k, v in self.zarr_group.arrays()
    565     )

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/core/utils.py:443, in FrozenDict(*args, **kwargs)
    442 def FrozenDict(*args, **kwargs) -> Frozen:
--> 443     return Frozen(dict(*args, **kwargs))

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/xarray/backends/zarr.py:563, in <genexpr>(.0)
    562 def get_variables(self):
--> 563     return FrozenDict(
    564         (k, self.open_store_variable(k, v)) for k, v in self.zarr_group.arrays()
    565     )

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/zarr/hierarchy.py:691, in Group._array_iter(self, keys_only, method, recurse)
    689 if contains_array(self._store, path):
    690     _key = key.rstrip("/")
--> 691     yield _key if keys_only else (_key, self[key])
    692 elif recurse and contains_group(self._store, path):
    693     group = self[key]

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/zarr/hierarchy.py:467, in Group.__getitem__(self, item)
    465 path = self._item_path(item)
    466 try:
--> 467     return Array(
    468         self._store,
    469         read_only=self._read_only,
    470         path=path,
    471         chunk_store=self._chunk_store,
    472         synchronizer=self._synchronizer,
    473         cache_attrs=self.attrs.cache,
    474         zarr_version=self._version,
    475         meta_array=self._meta_array,
    476     )
    477 except ArrayNotFoundError:
    478     pass

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/zarr/core.py:170, in Array.__init__(self, store, path, read_only, chunk_store, synchronizer, cache_metadata, cache_attrs, partial_decompress, write_empty_chunks, zarr_version, meta_array)
    167     self._metadata_key_suffix = self._hierarchy_metadata["metadata_key_suffix"]
    169 # initialize metadata
--> 170 self._load_metadata()
    172 # initialize attributes
    173 akey = _prefix_to_attrs_key(self._store, self._key_prefix)

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/zarr/core.py:193, in Array._load_metadata(self)
    191 """(Re)load metadata from store."""
    192 if self._synchronizer is None:
--> 193     self._load_metadata_nosync()
    194 else:
    195     mkey = _prefix_to_array_key(self._store, self._key_prefix)

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/zarr/core.py:207, in Array._load_metadata_nosync(self)
    204     raise ArrayNotFoundError(self._path) from e
    205 else:
    206     # decode and store metadata as instance members
--> 207     meta = self._store._metadata_class.decode_array_metadata(meta_bytes)
    208     self._meta = meta
    209     self._shape = meta["shape"]

File ~/miniconda3/envs/numpy2.0_released/lib/python3.11/site-packages/zarr/meta.py:141, in Metadata2.decode_array_metadata(cls, s)
    139         meta["dimension_separator"] = dimension_separator
    140 except Exception as e:
--> 141     raise MetadataError("error decoding metadata") from e
    142 else:
    143     return meta

MetadataError: error decoding metadata

At first I assumed there was something wrong with our handling of the loaded cftime_variables, but actually even if I drop the 'Time' variable I still get exactly the same error:

vds = open_virtual_dataset(
    's3://wrf-se-ak-ar5/ccsm/rcp85/daily/2060/WRFDS_2060-01-01.nc',
    indexes={},
    drop_variables=['Time'],
)

I don't know why it's even trying to convert anything to a datetime - none of the other variables have units of time.

What's also weird is that this is raised from within meta.py:260, in Metadata2.decode_fill_value(cls, v, dtype, object_codec), which suggests a problem with the fill_value. But I checked and all of the variables in this virtual dataset have a fill_value of either a float or nan in their .encoding, again nothing about a datetime.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions