-
Hi, import h5py
from xarray.backends import BackendEntrypoint, BackendArray
from xarray import Dataset, DataArray, Variable
from xarray.core import indexing
import numpy as np
filename = "/tmp/testsgli.h5"
varname = "Lt_VN01"
def create_h5_data(filename, varname, shape):
h5f = h5py.File(filename, mode="w")
h5f[varname] = np.random.rand(*shape)
h5f.close()
create_h5_data(filename, varname, (2000, 2000))
class H5Array(BackendArray):
def __init__(self, array):
self.shape = array.shape
self.dtype = array.dtype
self.array = array
def __getitem__(self, key):
return indexing.explicit_indexing_adapter(
key, self.shape, indexing.IndexingSupport.BASIC, self._getitem
)
def _getitem(self, key):
return self.array[key]
class SGLIBackend(BackendEntrypoint):
def open_dataset(self, filename, *, drop_variables=None, **kwargs):
ds = Dataset()
h5f = h5py.File(filename)
h5_arr = h5f["Lt_VN01"]
ds["Lt_VN01"] = Variable(["y", "x"],
indexing.LazilyIndexedArray(H5Array(h5_arr)),
encoding={"preferred_chunks": h5_arr.chunks})
return ds
print(SGLIBackend().open_dataset(filename)[varname].data) As you can see, the result is not a dask array. Annex question: I see in the source of existing engines the usage of datastore, where can I find documentation about how to use it in the context of a backend? |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 3 replies
-
as far as I can tell, you're not supposed to handle
As such, I think you should change the last line to Traceback---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Input In [1], in <cell line: 49>()
41 ds["Lt_VN01"] = Variable(
42 ["y", "x"],
43 indexing.LazilyIndexedArray(H5Array(h5_arr)),
44 encoding={"preferred_chunks": h5_arr.chunks},
45 )
46 return ds
---> 49 xr.open_dataset(filename, engine=SGLIBackend, chunks={})
File .../xarray/backends/api.py:545, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
538 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
539 backend_ds = backend.open_dataset(
540 filename_or_obj,
541 drop_variables=drop_variables,
542 **decoders,
543 **kwargs,
544 )
--> 545 ds = _dataset_from_backend_dataset(
546 backend_ds,
547 filename_or_obj,
548 engine,
549 chunks,
550 cache,
551 overwrite_encoded_chunks,
552 inline_array,
553 drop_variables=drop_variables,
554 **decoders,
555 **kwargs,
556 )
557 return ds
File .../xarray/backends/api.py:357, in _dataset_from_backend_dataset(backend_ds, filename_or_obj, engine, chunks, cache, overwrite_encoded_chunks, inline_array, **extra_tokens)
355 ds = backend_ds
356 else:
--> 357 ds = _chunk_ds(
358 backend_ds,
359 filename_or_obj,
360 engine,
361 chunks,
362 overwrite_encoded_chunks,
363 inline_array,
364 **extra_tokens,
365 )
367 ds.set_close(backend_ds._close)
369 # Ensure source filename always stored in dataset object
File .../xarray/backends/api.py:325, in _chunk_ds(backend_ds, filename_or_obj, engine, chunks, overwrite_encoded_chunks, inline_array, **extra_tokens)
323 variables = {}
324 for name, var in backend_ds.variables.items():
--> 325 var_chunks = _get_chunk(var, chunks)
326 variables[name] = _maybe_chunk(
327 name,
328 var,
(...)
333 inline_array=inline_array,
334 )
335 return backend_ds._replace(variables)
File .../xarray/core/dataset.py:211, in _get_chunk(var, chunks)
209 # Determine the explicit requested chunks.
210 preferred_chunks = var.encoding.get("preferred_chunks", {})
--> 211 preferred_chunk_shape = tuple(
212 preferred_chunks.get(dim, size) for dim, size in zip(dims, shape)
213 )
214 if isinstance(chunks, Number) or (chunks == "auto"):
215 chunks = dict.fromkeys(dims, chunks)
File .../xarray/core/dataset.py:212, in <genexpr>(.0)
209 # Determine the explicit requested chunks.
210 preferred_chunks = var.encoding.get("preferred_chunks", {})
211 preferred_chunk_shape = tuple(
--> 212 preferred_chunks.get(dim, size) for dim, size in zip(dims, shape)
213 )
214 if isinstance(chunks, Number) or (chunks == "auto"):
215 chunks = dict.fromkeys(dims, chunks)
AttributeError: 'NoneType' object has no attribute 'get' which means that Working exampleIn [1]: import h5py
...: from xarray.backends import BackendEntrypoint, BackendArray
...: from xarray import Dataset, DataArray, Variable
...: from xarray.core import indexing
...: import xarray as xr
...: import numpy as np
...:
...: filename = "/tmp/testsgli.h5"
...: varname = "Lt_VN01"
...:
...:
...: def create_h5_data(filename, varname, shape):
...: h5f = h5py.File(filename, mode="w")
...: h5f[varname] = np.random.rand(*shape)
...: h5f.close()
...:
...:
...: create_h5_data(filename, varname, (2000, 2000))
...:
...:
...: class H5Array(BackendArray):
...: def __init__(self, array):
...: self.shape = array.shape
...: self.dtype = array.dtype
...: self.array = array
...:
...: def __getitem__(self, key):
...: return indexing.explicit_indexing_adapter(
...: key, self.shape, indexing.IndexingSupport.BASIC, self._getitem
...: )
...:
...: def _getitem(self, key):
...: return self.array[key]
...:
...:
...: class SGLIBackend(BackendEntrypoint):
...: def open_dataset(self, filename, *, drop_variables=None, **kwargs):
...: ds = Dataset()
...: h5f = h5py.File(filename)
...: h5_arr = h5f["Lt_VN01"]
...: ds["Lt_VN01"] = Variable(
...: ["y", "x"],
...: indexing.LazilyIndexedArray(H5Array(h5_arr)),
...: encoding={"preferred_chunks": {}},
...: )
...: return ds
...:
...:
...: xr.open_dataset(filename, engine=SGLIBackend, chunks={})
Out[1]:
<xarray.Dataset>
Dimensions: (y: 2000, x: 2000)
Dimensions without coordinates: y, x
Data variables:
Lt_VN01 (y, x) float64 dask.array<chunksize=(2000, 2000), meta=np.ndarray>
As mentioned in one of the PRs that introduced the custom backends, the datastore is an implementation detail and can be removed at any moment (hence the lack of documentation). |
Beta Was this translation helpful? Give feedback.
-
Does https://tutorial.xarray.dev/advanced/backends/2.Backend_with_Lazy_Loading.html help? (suggestions and PRs to improve docs and tutorials are always welcome) |
Beta Was this translation helpful? Give feedback.
as far as I can tell, you're not supposed to handle
dask
in the backend, this will be taken care of byopen_dataset
:As such, I think you should change the last line to
xr.open_dataset(filename, engine=SGLIBackend, chunks={})
, but that raises anAttributeError
.Traceback