From eeb3004729674d3f1cd3fe93b9db8964c884cbe0 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Tue, 3 Jun 2025 17:35:43 +0200 Subject: [PATCH 01/60] create a xarray healpix moc index --- xdggs/healpix.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 2da42bf5..f10f57b7 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -12,6 +12,7 @@ import cdshealpix.ring import numpy as np import xarray as xr +from healpix_geo.nested import RangeMOCIndex from xarray.indexes import PandasIndex from xdggs.grid import DGGSInfo, translate_parameters @@ -309,6 +310,51 @@ def cell_boundaries(self, cell_ids: Any, backend="shapely") -> np.ndarray: return backend_func(vertices) +class HealpixMocIndex(xr.Index): + def __init__(self, index, *, dim, name, grid_info): + self._index = index + self._dim = dim + self._grid_info = grid_info + self._name = name + + @classmethod + def from_array(cls, array, *, dim, name, grid_info): + index = RangeMOCIndex.from_cell_ids(grid_info.level, array.astype("uint64")) + return cls(index, dim=dim, name=name, grid_info=grid_info) + + def _replace(self, index): + return type(self)( + index, dim=self._dim, name=self._name, grid_info=self._grid_info + ) + + @classmethod + def from_variables(cls, variables, *, options): + name, var, dim = _extract_cell_id_variable(variables) + grid_info = HealpixInfo.from_dict(var.attrs | options) + + return cls.from_array(var.data, dim=dim, name=name, grid_info=grid_info) + + def create_variables(self, variables): + name = self._name + if variables is not None and name in variables: + var = variables[name] + attrs = var.attrs + encoding = var.encoding + else: + attrs = None + encoding = None + + var = xr.Variable( + self._dim, self._index.cell_ids(), attrs=attrs, encoding=encoding + ) + return {name: var} + + def isel(self, indexers): + indexer = indexers[self._dim] + + return self._replace(self._index.isel(indexer)) + + @register_dggs("healpix") class HealpixIndex(DGGSIndex): def __init__( From 900546cf9838ae26f1b84350ddf0eb698bb31af6 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Tue, 3 Jun 2025 17:37:00 +0200 Subject: [PATCH 02/60] hack to allow creating a moc index --- xdggs/accessor.py | 8 ++++++-- xdggs/h3.py | 1 + xdggs/healpix.py | 23 +++++++++++++++++++---- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/xdggs/accessor.py b/xdggs/accessor.py index 1852c981..0283a637 100644 --- a/xdggs/accessor.py +++ b/xdggs/accessor.py @@ -29,7 +29,9 @@ def __init__(self, obj: xr.Dataset | xr.DataArray): self._name = name self._index = index - def decode(self, grid_info=None, *, name="cell_ids") -> xr.Dataset | xr.DataArray: + def decode( + self, grid_info=None, *, name="cell_ids", index_kind="pandas" + ) -> xr.Dataset | xr.DataArray: """decode the DGGS cell ids Parameters @@ -51,7 +53,9 @@ def decode(self, grid_info=None, *, name="cell_ids") -> xr.Dataset | xr.DataArra if isinstance(grid_info, dict): var.attrs = grid_info - return self._obj.drop_indexes(name, errors="ignore").set_xindex(name, DGGSIndex) + return self._obj.drop_indexes(name, errors="ignore").set_xindex( + name, DGGSIndex, index_kind=index_kind + ) @property def index(self) -> DGGSIndex: diff --git a/xdggs/h3.py b/xdggs/h3.py index 0a5b103c..ca3de365 100644 --- a/xdggs/h3.py +++ b/xdggs/h3.py @@ -223,6 +223,7 @@ def from_variables( ) -> "H3Index": _, var, dim = _extract_cell_id_variable(variables) + options.pop("index_kind", None) grid_info = H3Info.from_dict(var.attrs | options) return cls(var.data, dim, grid_info) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index f10f57b7..fba7f1e0 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -362,11 +362,24 @@ def __init__( cell_ids: Any | PandasIndex, dim: str, grid_info: DGGSInfo, + index_kind: str = "pandas", ): if not isinstance(grid_info, HealpixInfo): raise ValueError(f"grid info object has an invalid type: {type(grid_info)}") - super().__init__(cell_ids, dim, grid_info) + self._dim = dim + + if isinstance(cell_ids, xr.Index): + self._pd_index = cell_ids + elif index_kind == "pandas": + self._pd_index = PandasIndex(cell_ids, dim) + elif index_kind == "moc": + self._pd_index = HealpixMocIndex.from_array( + cell_ids, dim=dim, grid_info=grid_info, name="cell_ids" + ) + self._kind = index_kind + + self._grid = grid_info @classmethod def from_variables( @@ -377,16 +390,18 @@ def from_variables( ) -> "HealpixIndex": _, var, dim = _extract_cell_id_variable(variables) + index_kind = options.pop("index_kind", "pandas") + grid_info = HealpixInfo.from_dict(var.attrs | options) - return cls(var.data, dim, grid_info) + return cls(var.data, dim, grid_info, index_kind=index_kind) def _replace(self, new_pd_index: PandasIndex): - return type(self)(new_pd_index, self._dim, self._grid) + return type(self)(new_pd_index, self._dim, self._grid, index_kind=self._kind) @property def grid_info(self) -> HealpixInfo: return self._grid def _repr_inline_(self, max_width: int): - return f"HealpixIndex(level={self._grid.level}, indexing_scheme={self._grid.indexing_scheme})" + return f"HealpixIndex(level={self._grid.level}, indexing_scheme={self._grid.indexing_scheme}, kind={self._kind})" From fbc7154a674b4950bcc47d67ee905d9022cc119e Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 5 Jun 2025 11:30:00 +0200 Subject: [PATCH 03/60] depend on `healpix-geo` --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 94f0f46d..bb384f28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ requires-python = ">=3.10" dependencies = [ "xarray", "cdshealpix", + "healpix-geo>=0.0.3", "h3ronpy", "typing-extensions", "lonboard>=0.9.3", From 017d1d050e41ff1beefe54239896b883298e7eaf Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 5 Jun 2025 11:30:20 +0200 Subject: [PATCH 04/60] install `healpix-geo` from PyPI --- ci/environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/environment.yml b/ci/environment.yml index b5f2eb23..e7252e00 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -18,3 +18,4 @@ dependencies: - pip: - h3ronpy - cdshealpix + - healpix-geo From d197dfacc95f59bad8791981fbd7bc81209f342c Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 5 Jun 2025 12:23:35 +0200 Subject: [PATCH 05/60] install `healpix-geo` in the docs --- ci/docs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/docs.yml b/ci/docs.yml index 8b6f7a2a..63a0162d 100644 --- a/ci/docs.yml +++ b/ci/docs.yml @@ -29,4 +29,5 @@ dependencies: - cdshealpix - pip - pip: + - healpix-geo - -e .. From 4246fac96eaba865e66a964e2a4c949d10414d89 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 5 Jun 2025 17:12:12 +0200 Subject: [PATCH 06/60] use a special `values` method to access the actual data --- xdggs/healpix.py | 6 ++++++ xdggs/index.py | 7 +++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index fba7f1e0..557a914d 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -381,6 +381,12 @@ def __init__( self._grid = grid_info + def values(self): + if self._kind == "moc": + return self._pd_index._index.cell_ids() + else: + return self._pd_index.index.values + @classmethod def from_variables( cls: type["HealpixIndex"], diff --git a/xdggs/index.py b/xdggs/index.py index b0f71f28..d02232e0 100644 --- a/xdggs/index.py +++ b/xdggs/index.py @@ -67,6 +67,9 @@ def from_variables( return cls.from_variables(variables, options=options) + def values(self): + return self._pd_index.index.values + def create_variables( self, variables: Mapping[Any, xr.Variable] | None = None ) -> dict[Hashable, xr.Variable]: @@ -90,10 +93,10 @@ def _replace(self, new_pd_index: PandasIndex): raise NotImplementedError() def cell_centers(self) -> tuple[np.ndarray, np.ndarray]: - return self._grid.cell_ids2geographic(self._pd_index.index.values) + return self._grid.cell_ids2geographic(self.values()) def cell_boundaries(self) -> np.ndarray: - return self.grid_info.cell_boundaries(self._pd_index.index.values) + return self.grid_info.cell_boundaries(self.values()) @property def grid_info(self) -> DGGSInfo: From 2b5670babbb01b4fb39781aed0802978706959a9 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 5 Jun 2025 17:13:04 +0200 Subject: [PATCH 07/60] only support `"nested"` in the MOC index --- xdggs/healpix.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 557a914d..edf87ca7 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -319,6 +319,11 @@ def __init__(self, index, *, dim, name, grid_info): @classmethod def from_array(cls, array, *, dim, name, grid_info): + if grid_info.indexing_scheme != "nested": + raise ValueError( + "The MOC index currently only supports the 'nested' scheme" + ) + index = RangeMOCIndex.from_cell_ids(grid_info.level, array.astype("uint64")) return cls(index, dim=dim, name=name, grid_info=grid_info) From 3b43dea5480f2abd4ca7f26c79c8695af3dea3d6 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 5 Jun 2025 17:14:15 +0200 Subject: [PATCH 08/60] special-case the full domain --- xdggs/healpix.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index edf87ca7..f902699a 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -325,6 +325,11 @@ def from_array(cls, array, *, dim, name, grid_info): ) index = RangeMOCIndex.from_cell_ids(grid_info.level, array.astype("uint64")) + + if array.size == 12 * 4**grid_info.level: + index = RangeMOCIndex.full_domain(grid_info.level) + else: + index = RangeMOCIndex.from_cell_ids(grid_info.level, array.astype("uint64")) return cls(index, dim=dim, name=name, grid_info=grid_info) def _replace(self, index): From 86c0a44ebb92b4b972bd90c1e22eb91e882a5376 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 5 Jun 2025 17:31:22 +0200 Subject: [PATCH 09/60] remove the duplicate index creation --- xdggs/healpix.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index f902699a..a8d26bad 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -324,8 +324,6 @@ def from_array(cls, array, *, dim, name, grid_info): "The MOC index currently only supports the 'nested' scheme" ) - index = RangeMOCIndex.from_cell_ids(grid_info.level, array.astype("uint64")) - if array.size == 12 * 4**grid_info.level: index = RangeMOCIndex.full_domain(grid_info.level) else: From 19fe087dae0b047bf19596fc9bdeacf3b36d87c0 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 5 Jun 2025 17:31:41 +0200 Subject: [PATCH 10/60] forward to the wrapped index' `create_variables` --- xdggs/healpix.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index a8d26bad..be7aca79 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -410,6 +410,9 @@ def from_variables( return cls(var.data, dim, grid_info, index_kind=index_kind) + def create_variables(self, variables): + return self._pd_index.create_variables(variables) + def _replace(self, new_pd_index: PandasIndex): return type(self)(new_pd_index, self._dim, self._grid, index_kind=self._kind) From 33157452014f41a1c0fb3a1ce9c772374b6b439f Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 6 Jun 2025 11:49:47 +0200 Subject: [PATCH 11/60] dask support in `create_variables` --- xdggs/healpix.py | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index be7aca79..a14281bf 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -310,6 +310,24 @@ def cell_boundaries(self, cell_ids: Any, backend="shapely") -> np.ndarray: return backend_func(vertices) +def construct_chunk_ranges(chunks, until): + start = 0 + + for chunksize in chunks: + stop = start + chunksize + if stop > until: + stop = until + if start == stop: + break + + yield chunksize, slice(start, stop) + start = stop + + +def extract_chunk(index, slice_): + return index.isel(slice_).cell_ids() + + class HealpixMocIndex(xr.Index): def __init__(self, index, *, dim, name, grid_info): self._index = index @@ -348,13 +366,35 @@ def create_variables(self, variables): var = variables[name] attrs = var.attrs encoding = var.encoding + chunks = var.chunksizes.get(self._dim) else: attrs = None encoding = None + chunks = None + + if chunks is not None: + import dask + import dask.array as da + + chunk_arrays = [ + da.from_delayed( + dask.delayed(extract_chunk)(self._index, slice_), + shape=(chunksize,), + dtype="uint64", + name=f"chunk-{index}", + meta=np.array((), dtype="uint64"), + ) + for index, (chunksize, slice_) in enumerate( + construct_chunk_ranges(chunks, self._index.size) + ) + ] + data = da.concatenate(chunk_arrays, axis=0) + var = xr.Variable(self._dim, data, attrs=attrs, encoding=encoding) + else: + var = xr.Variable( + self._dim, self._index.cell_ids(), attrs=attrs, encoding=encoding + ) - var = xr.Variable( - self._dim, self._index.cell_ids(), attrs=attrs, encoding=encoding - ) return {name: var} def isel(self, indexers): From d8b327afd0858352c12b1cc396227642416eddaf Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 6 Jun 2025 14:44:04 +0200 Subject: [PATCH 12/60] support `dask` in `from_cell_ids` (this doesn't work yet because `moc::RangeMOC` does not derive from `serde`'s `Serialize` and `Deserialize`) --- xdggs/healpix.py | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index a14281bf..91e7a287 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -22,6 +22,13 @@ T = TypeVar("T") +try: + import dask.array as da + + dask_array_type = (da.Array,) +except ImportError: + dask_array_type = () + def polygons_shapely(vertices): import shapely @@ -328,6 +335,27 @@ def extract_chunk(index, slice_): return index.isel(slice_).cell_ids() +def partition_chunks(chunks, n_partitions): + import dask + import dask.bag as db + + def _construct_slices(size, n_partitions): + partition_size = -(-size // n_partitions) + start = 0 + for _ in range(n_partitions): + stop = start + partition_size + if stop > size: + stop = size + + yield slice(start, stop) + start = stop + + return db.from_delayed( + dask.delayed(list)(chunks[slice_]) + for slice_ in _construct_slices(len(chunks), n_partitions) + ) + + class HealpixMocIndex(xr.Index): def __init__(self, index, *, dim, name, grid_info): self._index = index @@ -336,7 +364,7 @@ def __init__(self, index, *, dim, name, grid_info): self._name = name @classmethod - def from_array(cls, array, *, dim, name, grid_info): + def from_array(cls, array, *, dim, name, grid_info, n_partitions=100): if grid_info.indexing_scheme != "nested": raise ValueError( "The MOC index currently only supports the 'nested' scheme" @@ -344,6 +372,17 @@ def from_array(cls, array, *, dim, name, grid_info): if array.size == 12 * 4**grid_info.level: index = RangeMOCIndex.full_domain(grid_info.level) + elif isinstance(array, dask_array_type): + import dask + + indexes = [ + dask.delayed(RangeMOCIndex.from_cell_ids)(grid_info.level, chunk) + for chunk in array.astype("uint64").to_delayed() + ] + bag = partition_chunks(indexes, n_partitions=n_partitions) + index = bag.accumulate( + lambda index1, index2: index1.union(index2) + ).compute() else: index = RangeMOCIndex.from_cell_ids(grid_info.level, array.astype("uint64")) return cls(index, dim=dim, name=name, grid_info=grid_info) From c88f0c3304d034054317f46a078c1f1f5cd6f936 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 15:47:27 +0200 Subject: [PATCH 13/60] wrap the chunked delayed construction in a list --- xdggs/healpix.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 91e7a287..ba260a3a 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -351,8 +351,10 @@ def _construct_slices(size, n_partitions): start = stop return db.from_delayed( - dask.delayed(list)(chunks[slice_]) - for slice_ in _construct_slices(len(chunks), n_partitions) + [ + dask.delayed(lambda *args: args)(*chunks[slice_]) + for slice_ in _construct_slices(len(chunks), n_partitions) + ] ) From ffbb4e9fffc7432295356e6172f4ed3365c0cfdd Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 15:48:21 +0200 Subject: [PATCH 14/60] replace the dask loading code with a error message --- xdggs/healpix.py | 38 +++++--------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index ba260a3a..a67a6653 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -335,29 +335,6 @@ def extract_chunk(index, slice_): return index.isel(slice_).cell_ids() -def partition_chunks(chunks, n_partitions): - import dask - import dask.bag as db - - def _construct_slices(size, n_partitions): - partition_size = -(-size // n_partitions) - start = 0 - for _ in range(n_partitions): - stop = start + partition_size - if stop > size: - stop = size - - yield slice(start, stop) - start = stop - - return db.from_delayed( - [ - dask.delayed(lambda *args: args)(*chunks[slice_]) - for slice_ in _construct_slices(len(chunks), n_partitions) - ] - ) - - class HealpixMocIndex(xr.Index): def __init__(self, index, *, dim, name, grid_info): self._index = index @@ -375,16 +352,11 @@ def from_array(cls, array, *, dim, name, grid_info, n_partitions=100): if array.size == 12 * 4**grid_info.level: index = RangeMOCIndex.full_domain(grid_info.level) elif isinstance(array, dask_array_type): - import dask - - indexes = [ - dask.delayed(RangeMOCIndex.from_cell_ids)(grid_info.level, chunk) - for chunk in array.astype("uint64").to_delayed() - ] - bag = partition_chunks(indexes, n_partitions=n_partitions) - index = bag.accumulate( - lambda index1, index2: index1.union(index2) - ).compute() + raise NotImplementedError( + "Creating a HealpixMocIndex from chunked cell ids is not supported, yet." + " For now, please manually compute the array (if possible) and construct" + " the index from in-memory cell ids." + ) else: index = RangeMOCIndex.from_cell_ids(grid_info.level, array.astype("uint64")) return cls(index, dim=dim, name=name, grid_info=grid_info) From c7691345c75e0e0169065beb7c2f8878337cac75 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 15:58:09 +0200 Subject: [PATCH 15/60] out-of-core implement the RangeMOCIndex --- xdggs/healpix.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index a67a6653..88373d4c 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -352,11 +352,15 @@ def from_array(cls, array, *, dim, name, grid_info, n_partitions=100): if array.size == 12 * 4**grid_info.level: index = RangeMOCIndex.full_domain(grid_info.level) elif isinstance(array, dask_array_type): - raise NotImplementedError( - "Creating a HealpixMocIndex from chunked cell ids is not supported, yet." - " For now, please manually compute the array (if possible) and construct" - " the index from in-memory cell ids." + from functools import reduce + + import dask + + [indexes] = dask.compute( + dask.delayed(RangeMOCIndex.from_cell_ids)(grid_info.level, chunk) + for chunk in array.astype("uint64").to_delayed() ) + index = reduce(RangeMOCIndex.union, indexes) else: index = RangeMOCIndex.from_cell_ids(grid_info.level, array.astype("uint64")) return cls(index, dim=dim, name=name, grid_info=grid_info) From 4d95020f6f3b95663a44e727c690a6c7dee391b0 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:09:36 +0200 Subject: [PATCH 16/60] remove the `n_partitions` parameter for now --- xdggs/healpix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 88373d4c..87c0f1ec 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -343,7 +343,7 @@ def __init__(self, index, *, dim, name, grid_info): self._name = name @classmethod - def from_array(cls, array, *, dim, name, grid_info, n_partitions=100): + def from_array(cls, array, *, dim, name, grid_info): if grid_info.indexing_scheme != "nested": raise ValueError( "The MOC index currently only supports the 'nested' scheme" From ea42c6e68a940b3788eadab172cd197c6c65e58c Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:21:13 +0200 Subject: [PATCH 17/60] forward the `size` and `nbytes` properties --- xdggs/healpix.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 87c0f1ec..40d8352c 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -342,6 +342,14 @@ def __init__(self, index, *, dim, name, grid_info): self._grid_info = grid_info self._name = name + @property + def size(self): + return self._index.size + + @property + def nbytes(self): + return self._index.nbytes + @classmethod def from_array(cls, array, *, dim, name, grid_info): if grid_info.indexing_scheme != "nested": From e8b689c5da1ca44850ae82ea36a2d9bb32159161 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:23:06 +0200 Subject: [PATCH 18/60] check that construction from an array works properly --- xdggs/tests/test_healpix.py | 57 ++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index bc75eef7..d88635a9 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -12,7 +12,13 @@ from xarray.core.indexes import PandasIndex from xdggs import healpix -from xdggs.tests import assert_exceptions_equal, geoarrow_to_shapely +from xdggs.tests import ( + assert_exceptions_equal, + da, + geoarrow_to_shapely, + raise_if_dask_computes, + requires_dask, +) try: ExceptionGroup @@ -526,3 +532,52 @@ def test_repr_inline(level, max_width) -> None: assert f"level={level}" in actual # ignore max_width for now # assert len(actual) <= max_width + + +class TestHealpixMocIndex: + @pytest.mark.parametrize( + ["depth", "cell_ids", "max_computes"], + ( + pytest.param( + 2, np.arange(12 * 4**2, dtype="uint64"), 1, id="numpy-2-full_domain" + ), + pytest.param( + 2, + np.arange(3 * 4**2, 5 * 4**2, dtype="uint64"), + 1, + id="numpy-2-region", + ), + pytest.param( + 10, + da.arange(12 * 4**10, chunks=(4**6,), dtype="uint64"), + 0, + marks=requires_dask, + id="dask-10-full_domain", + ), + pytest.param( + 15, + da.arange(12 * 4**15, chunks=(4**10,), dtype="uint64"), + 0, + marks=requires_dask, + id="dask-15-full_domain", + ), + pytest.param( + 10, + da.arange(3 * 4**10, 5 * 4**10, chunks=(4**6,), dtype="uint64"), + 1, + marks=requires_dask, + id="dask-10-region", + ), + ), + ) + def test_from_array(self, depth, cell_ids, max_computes): + grid_info = healpix.HealpixInfo(level=depth, indexing_scheme="nested") + + with raise_if_dask_computes(max_computes=max_computes): + index = healpix.HealpixMocIndex.from_array( + cell_ids, dim="cells", name="cell_ids", grid_info=grid_info + ) + + assert isinstance(index, healpix.HealpixMocIndex) + assert index.size == cell_ids.size + assert index.nbytes == 16 From b3bea15fb64a94939fdd01df435e53978bb94e78 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:23:35 +0200 Subject: [PATCH 19/60] dask testing utils --- xdggs/tests/__init__.py | 47 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/xdggs/tests/__init__.py b/xdggs/tests/__init__.py index 1a193487..683f18e4 100644 --- a/xdggs/tests/__init__.py +++ b/xdggs/tests/__init__.py @@ -1,4 +1,7 @@ +from contextlib import nullcontext + import geoarrow.pyarrow as ga +import pytest import shapely from xdggs.tests.matchers import ( # noqa: F401 @@ -7,6 +10,50 @@ assert_exceptions_equal, ) +try: + import dask + import dask.array as da + + has_dask = True +except ImportError: + dask = None + + class da: + @staticmethod + def arange(*args, **kwargs): + pass + + has_dask = False + + +class CountingScheduler: + """Simple dask scheduler counting the number of computes. + + Reference: https://stackoverflow.com/questions/53289286/""" + + def __init__(self, max_computes=0): + self.total_computes = 0 + self.max_computes = max_computes + + def __call__(self, dsk, keys, **kwargs): + self.total_computes += 1 + if self.total_computes > self.max_computes: + raise RuntimeError( + f"Too many computes. Total: {self.total_computes} > max: {self.max_computes}." + ) + return dask.get(dsk, keys, **kwargs) + + +requires_dask = pytest.mark.skipif(not has_dask, reason="requires dask") + def geoarrow_to_shapely(arr): return shapely.from_wkb(ga.as_wkb(arr)) + + +def raise_if_dask_computes(max_computes=0): + # return a dummy context manager so that this can be used for non-dask objects + if not has_dask: + return nullcontext() + scheduler = CountingScheduler(max_computes) + return dask.config.set(scheduler=scheduler) From 680ad6449a98af9c3b71fbc55ddefd4b307c3344 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:23:49 +0200 Subject: [PATCH 20/60] remove the unused `unique` scheme for now --- xdggs/healpix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 40d8352c..c92002a8 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -120,12 +120,12 @@ class HealpixInfo(DGGSInfo): level: int """int : The hierarchical level of the grid""" - indexing_scheme: Literal["nested", "ring", "unique"] = "nested" + indexing_scheme: Literal["nested", "ring"] = "nested" """int : The indexing scheme of the grid""" valid_parameters: ClassVar[dict[str, Any]] = { "level": range(0, 29 + 1), - "indexing_scheme": ["nested", "ring", "unique"], + "indexing_scheme": ["nested", "ring"], } def __post_init__(self): From e0353a5b86e1f8d6ba1099b6fbf2d19379b72a0d Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:26:47 +0200 Subject: [PATCH 21/60] typo --- docs/changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 39283937..83a23dfe 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -8,7 +8,7 @@ ### Documentation -- Documentation Contributer Guide + Github Button ({pull}`137`) +- Documentation Contributor's Guide + Github Button ({pull}`137`) ### Internal changes From 15e763358f3a59f9a382060bf804046bd9072049 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:28:21 +0200 Subject: [PATCH 22/60] changelog --- docs/changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 83a23dfe..a99ee0d2 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -4,6 +4,8 @@ ### New features +- memory-efficient index implementation based on multi-order coverage maps (MOCs) ({pull}`151`) + ### Bug fixes ### Documentation From 179ae5a3daf0a31b0a800e6610342e73c3eddf88 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:33:45 +0200 Subject: [PATCH 23/60] treat `decode` as a method --- docs/api.rst | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index efb0f280..2122b206 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -38,6 +38,11 @@ Parameters Dataset.dggs.grid_info Dataset.dggs.params + +.. autosummary:: + :toctree: generated + :template: autosummary/accessor_method.rst + Dataset.dggs.decode @@ -62,7 +67,12 @@ Parameters DataArray.dggs.grid_info DataArray.dggs.params - DataArray.dggs.decode + +.. autosummary:: + :toctree: generated + :template: autosummary/accessor_method.rst + + Dataset.dggs.decode Data inference From 2ef036c5d81974f86f83185a6f6b6f2673eb9a43 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:36:47 +0200 Subject: [PATCH 24/60] document the `index_kind` parameter --- xdggs/accessor.py | 3 +++ xdggs/index.py | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/xdggs/accessor.py b/xdggs/accessor.py index 0283a637..fd05fefe 100644 --- a/xdggs/accessor.py +++ b/xdggs/accessor.py @@ -41,6 +41,9 @@ def decode( the dataset. name : str, default: "cell_ids" The name of the coordinate containing the cell ids. + index_kind : str, default: "pandas" + The kind of index to use. This is implementation-dependent, with all + implementations supporting at least in-memory indexes (``"pandas"``). Returns ------- diff --git a/xdggs/index.py b/xdggs/index.py index d02232e0..fdd0e634 100644 --- a/xdggs/index.py +++ b/xdggs/index.py @@ -9,7 +9,7 @@ from xdggs.utils import GRID_REGISTRY, _extract_cell_id_variable -def decode(ds, grid_info=None, *, name="cell_ids"): +def decode(ds, grid_info=None, *, name="cell_ids", index_kind="pandas"): """ decode grid parameters and create a DGGS index @@ -23,6 +23,9 @@ def decode(ds, grid_info=None, *, name="cell_ids"): the dataset. name : str, default: "cell_ids" The name of the coordinate containing the cell ids. + index_kind : str, default: "pandas" + The kind of index to use. This is implementation-dependent, with all + implementations supporting at least in-memory indexes (``"pandas"``). Returns ------- @@ -34,7 +37,7 @@ def decode(ds, grid_info=None, *, name="cell_ids"): xarray.Dataset.dggs.decode xarray.DataArray.dggs.decode """ - return ds.dggs.decode(name=name, grid_info=grid_info) + return ds.dggs.decode(name=name, grid_info=grid_info, index_kind="pandas") class DGGSIndex(Index): From bde552ddbebecf2be31a4851c0b377a64087a8ca Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:37:09 +0200 Subject: [PATCH 25/60] pass through the `index_kind` parameter --- xdggs/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xdggs/index.py b/xdggs/index.py index fdd0e634..a3d97b7f 100644 --- a/xdggs/index.py +++ b/xdggs/index.py @@ -37,7 +37,7 @@ def decode(ds, grid_info=None, *, name="cell_ids", index_kind="pandas"): xarray.Dataset.dggs.decode xarray.DataArray.dggs.decode """ - return ds.dggs.decode(name=name, grid_info=grid_info, index_kind="pandas") + return ds.dggs.decode(name=name, grid_info=grid_info, index_kind=index_kind) class DGGSIndex(Index): From 19ca9d9ee20462d654b5a18716101ea9120e827e Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 11 Jun 2025 16:40:51 +0200 Subject: [PATCH 26/60] rename `_pd_index` to `_index` --- xdggs/h3.py | 7 +++---- xdggs/healpix.py | 18 +++++++++--------- xdggs/index.py | 20 ++++++++++---------- xdggs/tests/test_h3.py | 10 +++++----- xdggs/tests/test_healpix.py | 8 ++++---- 5 files changed, 31 insertions(+), 32 deletions(-) diff --git a/xdggs/h3.py b/xdggs/h3.py index ca3de365..d0831093 100644 --- a/xdggs/h3.py +++ b/xdggs/h3.py @@ -23,7 +23,6 @@ cells_to_wkb_polygons, coordinates_to_cells, ) -from xarray.indexes import PandasIndex from xdggs.grid import DGGSInfo, translate_parameters from xdggs.index import DGGSIndex @@ -208,7 +207,7 @@ class H3Index(DGGSIndex): def __init__( self, - cell_ids: Any | PandasIndex, + cell_ids: Any | xr.Index, dim: str, grid_info: DGGSInfo, ): @@ -232,8 +231,8 @@ def from_variables( def grid_info(self) -> H3Info: return self._grid - def _replace(self, new_pd_index: PandasIndex): - return type(self)(new_pd_index, self._dim, self._grid) + def _replace(self, new_index: xr.Index): + return type(self)(new_index, self._dim, self._grid) def _repr_inline_(self, max_width: int): return f"H3Index(level={self._grid.level})" diff --git a/xdggs/healpix.py b/xdggs/healpix.py index c92002a8..564d9923 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -432,7 +432,7 @@ def isel(self, indexers): class HealpixIndex(DGGSIndex): def __init__( self, - cell_ids: Any | PandasIndex, + cell_ids: Any | xr.Index, dim: str, grid_info: DGGSInfo, index_kind: str = "pandas", @@ -443,11 +443,11 @@ def __init__( self._dim = dim if isinstance(cell_ids, xr.Index): - self._pd_index = cell_ids + self._index = cell_ids elif index_kind == "pandas": - self._pd_index = PandasIndex(cell_ids, dim) + self._index = PandasIndex(cell_ids, dim) elif index_kind == "moc": - self._pd_index = HealpixMocIndex.from_array( + self._index = HealpixMocIndex.from_array( cell_ids, dim=dim, grid_info=grid_info, name="cell_ids" ) self._kind = index_kind @@ -456,9 +456,9 @@ def __init__( def values(self): if self._kind == "moc": - return self._pd_index._index.cell_ids() + return self._index._index.cell_ids() else: - return self._pd_index.index.values + return self._index.index.values @classmethod def from_variables( @@ -476,10 +476,10 @@ def from_variables( return cls(var.data, dim, grid_info, index_kind=index_kind) def create_variables(self, variables): - return self._pd_index.create_variables(variables) + return self._index.create_variables(variables) - def _replace(self, new_pd_index: PandasIndex): - return type(self)(new_pd_index, self._dim, self._grid, index_kind=self._kind) + def _replace(self, new_index: xr.Index): + return type(self)(new_index, self._dim, self._grid, index_kind=self._kind) @property def grid_info(self) -> HealpixInfo: diff --git a/xdggs/index.py b/xdggs/index.py index a3d97b7f..6759df94 100644 --- a/xdggs/index.py +++ b/xdggs/index.py @@ -42,15 +42,15 @@ def decode(ds, grid_info=None, *, name="cell_ids", index_kind="pandas"): class DGGSIndex(Index): _dim: str - _pd_index: PandasIndex + _index: PandasIndex def __init__(self, cell_ids: Any | PandasIndex, dim: str, grid_info: DGGSInfo): self._dim = dim if isinstance(cell_ids, PandasIndex): - self._pd_index = cell_ids + self._index = cell_ids else: - self._pd_index = PandasIndex(cell_ids, dim) + self._index = PandasIndex(cell_ids, dim) self._grid = grid_info @@ -71,28 +71,28 @@ def from_variables( return cls.from_variables(variables, options=options) def values(self): - return self._pd_index.index.values + return self._index.index.values def create_variables( self, variables: Mapping[Any, xr.Variable] | None = None ) -> dict[Hashable, xr.Variable]: - return self._pd_index.create_variables(variables) + return self._index.create_variables(variables) def isel( self: "DGGSIndex", indexers: Mapping[Any, int | np.ndarray | xr.Variable] ) -> Union["DGGSIndex", None]: - new_pd_index = self._pd_index.isel(indexers) - if new_pd_index is not None: - return self._replace(new_pd_index) + new_index = self._index.isel(indexers) + if new_index is not None: + return self._replace(new_index) else: return None def sel(self, labels, method=None, tolerance=None): if method == "nearest": raise ValueError("finding nearest grid cell has no meaning") - return self._pd_index.sel(labels, method=method, tolerance=tolerance) + return self._index.sel(labels, method=method, tolerance=tolerance) - def _replace(self, new_pd_index: PandasIndex): + def _replace(self, new_index: PandasIndex): raise NotImplementedError() def cell_centers(self) -> tuple[np.ndarray, np.ndarray]: diff --git a/xdggs/tests/test_h3.py b/xdggs/tests/test_h3.py index e1c2629d..c58c0205 100644 --- a/xdggs/tests/test_h3.py +++ b/xdggs/tests/test_h3.py @@ -221,8 +221,8 @@ def test_init(cell_ids, dim, level): assert index._dim == dim # TODO: how do we check the index, if at all? - assert index._pd_index.dim == dim - assert np.all(index._pd_index.index.values == cell_ids) + assert index._index.dim == dim + assert np.all(index._index.index.values == cell_ids) @pytest.mark.parametrize("level", levels) @@ -247,8 +247,8 @@ def test_from_variables(variable_name, variable, options): assert (index._dim,) == variable.dims # TODO: how do we check the index, if at all? - assert (index._pd_index.dim,) == variable.dims - assert np.all(index._pd_index.index.values == variable.data) + assert (index._index.dim,) == variable.dims + assert np.all(index._index.index.values == variable.data) @pytest.mark.parametrize(["old_variable", "new_variable"], variable_combinations) @@ -267,7 +267,7 @@ def test_replace(old_variable, new_variable): assert new_index._grid == index._grid assert new_index._dim == index._dim - assert new_index._pd_index == new_pandas_index + assert new_index._index == new_pandas_index @pytest.mark.parametrize("max_width", [20, 50, 80, 120]) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index d88635a9..6f14e403 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -471,9 +471,9 @@ def test_init(self, cell_ids, dim, grid) -> None: assert index._grid == grid assert index._dim == dim - assert index._pd_index.dim == dim + assert index._index.dim == dim - np.testing.assert_equal(index._pd_index.index.values, cell_ids) + np.testing.assert_equal(index._index.index.values, cell_ids) @given(strategies.grids()) def test_grid(self, grid): @@ -497,7 +497,7 @@ def test_from_variables(variable_name, variable, options) -> None: assert index._grid.indexing_scheme == expected_scheme assert (index._dim,) == variable.dims - np.testing.assert_equal(index._pd_index.index.values, variable.data) + np.testing.assert_equal(index._index.index.values, variable.data) @pytest.mark.parametrize(["old_variable", "new_variable"], variable_combinations) @@ -517,7 +517,7 @@ def test_replace(old_variable, new_variable) -> None: new_index = index._replace(new_pandas_index) assert new_index._dim == index._dim - assert new_index._pd_index == new_pandas_index + assert new_index._index == new_pandas_index assert index._grid == grid From 177ab6c05fb3606b9690d81e9a9ff4679d598c97 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 12:14:56 +0200 Subject: [PATCH 27/60] check `isel` --- xdggs/tests/test_healpix.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 6f14e403..9a77edb7 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -581,3 +581,30 @@ def test_from_array(self, depth, cell_ids, max_computes): assert isinstance(index, healpix.HealpixMocIndex) assert index.size == cell_ids.size assert index.nbytes == 16 + + @pytest.mark.parametrize( + "indexer", (slice(None), slice(None, 4**4), slice(2 * 4**4, 7 * 4**4)) + ) + def test_isel(self, indexer): + from healpix_geo.nested import RangeMOCIndex + + grid_info = healpix.HealpixInfo(level=4, indexing_scheme="nested") + cell_ids = np.arange(12 * 4**grid_info.level, dtype="uint64") + index = healpix.HealpixMocIndex( + RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids), + dim="cells", + name="cell_ids", + grid_info=grid_info, + ) + + actual = index.isel({"cells": indexer}) + expected = healpix.HealpixMocIndex( + RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids[indexer]), + dim="cells", + name="cell_ids", + grid_info=grid_info, + ) + + assert isinstance(actual, healpix.HealpixMocIndex) + assert actual.nbytes == expected.nbytes + np.testing.assert_equal(actual._index.cell_ids(), expected._index.cell_ids()) From 15d99c271dd4cf2076398c4b3436497c6a2b93ff Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 12:17:54 +0200 Subject: [PATCH 28/60] raise on irregular chunks --- xdggs/healpix.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 564d9923..f084fe50 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -401,6 +401,9 @@ def create_variables(self, variables): import dask import dask.array as da + if len(set(chunks)) > 2: + raise ValueError("irregular chunk sizes are not supported") + chunk_arrays = [ da.from_delayed( dask.delayed(extract_chunk)(self._index, slice_), From 13ebcccbcabb0d3157548ebf0dd73cd0aaf91352 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 12:26:45 +0200 Subject: [PATCH 29/60] check `create_variables` --- xdggs/tests/test_healpix.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 9a77edb7..35705823 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -608,3 +608,34 @@ def test_isel(self, indexer): assert isinstance(actual, healpix.HealpixMocIndex) assert actual.nbytes == expected.nbytes np.testing.assert_equal(actual._index.cell_ids(), expected._index.cell_ids()) + + @pytest.mark.parametrize("dask", [pytest.param(True, marks=requires_dask), False]) + def test_create_variables(self, dask): + from healpix_geo.nested import RangeMOCIndex + + grid_info = healpix.HealpixInfo(level=4, indexing_scheme="nested") + cell_ids = np.arange(12 * 4**grid_info.level, dtype="uint64") + indexer = slice(3 * 4**grid_info.level, 7 * 4**grid_info.level) + index = healpix.HealpixMocIndex( + RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids[indexer]), + dim="cells", + name="cell_ids", + grid_info=grid_info, + ) + + if dask: + variables = { + "cell_ids": xr.Variable("cells", cell_ids, grid_info.to_dict()).chunk( + {"cells": 4**2} + ) + } + else: + variables = { + "cell_ids": xr.Variable("cells", cell_ids, grid_info.to_dict()) + } + + actual = index.create_variables(variables) + expected = {"cell_ids": variables["cell_ids"].isel(cells=indexer)} + + assert actual.keys() == expected.keys() + xr.testing.assert_equal(actual["cell_ids"], expected["cell_ids"]) From 15de58ac7437a4dcbd202bdbeaf0492d584f6188 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 12:32:00 +0200 Subject: [PATCH 30/60] check that irregular chunks are rejected --- xdggs/tests/test_healpix.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 35705823..52b824ed 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -639,3 +639,26 @@ def test_create_variables(self, dask): assert actual.keys() == expected.keys() xr.testing.assert_equal(actual["cell_ids"], expected["cell_ids"]) + + @requires_dask + def test_create_variables_irregular_chunks(self): + from healpix_geo.nested import RangeMOCIndex + + grid_info = healpix.HealpixInfo(level=1, indexing_scheme="nested") + cell_ids = np.arange(12 * 4**grid_info.level, dtype="uint64") + index = healpix.HealpixMocIndex( + RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids), + dim="cells", + name="cell_ids", + grid_info=grid_info, + ) + chunks = (12, 13, 14, 9) + + variables = { + "cell_ids": xr.Variable("cells", cell_ids, grid_info.to_dict()).chunk( + {"cells": chunks} + ) + } + + with pytest.raises(ValueError, match="irregular chunk sizes.*"): + index.create_variables(variables) From 6df12bb7bcfc5c50a3d02477cf04916040b6e27e Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 13:59:38 +0200 Subject: [PATCH 31/60] more accurate condition --- xdggs/healpix.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index f084fe50..0bd091e2 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -401,7 +401,11 @@ def create_variables(self, variables): import dask import dask.array as da - if len(set(chunks)) > 2: + n_chunksizes = len(set(chunks)) + regular_chunks = n_chunksizes < 2 or ( + n_chunksizes == 2 and chunks[-1] < max(chunks) + ) + if not regular_chunks: raise ValueError("irregular chunk sizes are not supported") chunk_arrays = [ From cc4d9834f4d9c17e5c75dde37162e8e270c7be51 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 14:11:21 +0200 Subject: [PATCH 32/60] check more invalid chunks --- xdggs/tests/test_healpix.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 52b824ed..5d8c5bf3 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -641,7 +641,16 @@ def test_create_variables(self, dask): xr.testing.assert_equal(actual["cell_ids"], expected["cell_ids"]) @requires_dask - def test_create_variables_irregular_chunks(self): + @pytest.mark.parametrize( + "chunks", + ( + pytest.param((12, 13, 14, 9), id="irregular"), + pytest.param((13, 13, 9, 13), id="one_chunk_smaller"), + pytest.param((14, 10, 10, 14), id="multiple_chunks_smaller"), + pytest.param((18, 10, 10, 10), id="only_first_bigger"), + ), + ) + def test_create_variables_irregular_chunks(self, chunks): from healpix_geo.nested import RangeMOCIndex grid_info = healpix.HealpixInfo(level=1, indexing_scheme="nested") From c791c93d582e2e80a732ceae564d5dd7e8343a32 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 14:12:29 +0200 Subject: [PATCH 33/60] check more valid chunk sizes --- xdggs/tests/test_healpix.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 5d8c5bf3..457588d2 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -609,8 +609,15 @@ def test_isel(self, indexer): assert actual.nbytes == expected.nbytes np.testing.assert_equal(actual._index.cell_ids(), expected._index.cell_ids()) - @pytest.mark.parametrize("dask", [pytest.param(True, marks=requires_dask), False]) - def test_create_variables(self, dask): + @pytest.mark.parametrize( + "chunks", + [ + pytest.param((768, 768, 768, 768), marks=requires_dask), + pytest.param((780, 780, 780, 732), marks=requires_dask), + None, + ], + ) + def test_create_variables(self, chunks): from healpix_geo.nested import RangeMOCIndex grid_info = healpix.HealpixInfo(level=4, indexing_scheme="nested") @@ -623,10 +630,10 @@ def test_create_variables(self, dask): grid_info=grid_info, ) - if dask: + if chunks is not None: variables = { "cell_ids": xr.Variable("cells", cell_ids, grid_info.to_dict()).chunk( - {"cells": 4**2} + {"cells": chunks} ) } else: From ed9527c9aa31876472167e0b544f58b2040756e9 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 14:16:22 +0200 Subject: [PATCH 34/60] modify the chunksize if we reach the end --- xdggs/healpix.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 0bd091e2..0c276849 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -327,6 +327,9 @@ def construct_chunk_ranges(chunks, until): if start == stop: break + if until - start < chunksize: + chunksize = until - start + yield chunksize, slice(start, stop) start = stop From fc6cb5cdf3e5eed110041cbcfc3145131111c1b4 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 15:00:21 +0200 Subject: [PATCH 35/60] support irregular chunks by tracking them on the index --- xdggs/healpix.py | 73 ++++++++++++++++++++++++++++++------- xdggs/tests/test_healpix.py | 35 ++---------------- 2 files changed, 62 insertions(+), 46 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 0c276849..2ac3fa24 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -334,16 +334,51 @@ def construct_chunk_ranges(chunks, until): start = stop +def subset_chunks(chunks, indexer): + def _subset(offset, chunk, indexer): + if offset >= indexer.stop or offset + chunk < indexer.start: + # outside slice + return 0 + elif offset >= indexer.start and offset + chunk < indexer.stop: + # full chunk + return chunk + else: + # partial chunk + left_trim = offset - indexer.start + right_trim = offset + chunk - indexer.stop + + if left_trim < 0: + left_trim = 0 + + if right_trim < 0: + right_trim = 0 + + return chunk - left_trim - right_trim + + if chunks is None: + return None + + chunk_offsets = np.cumulative_sum(chunks, include_initial=True) + + trimmed_chunks = tuple( + _subset(offset, chunk, indexer) + for offset, chunk in zip(chunk_offsets[:-1], chunks) + ) + + return tuple(chunk for chunk in trimmed_chunks if chunk > 0) + + def extract_chunk(index, slice_): return index.isel(slice_).cell_ids() class HealpixMocIndex(xr.Index): - def __init__(self, index, *, dim, name, grid_info): + def __init__(self, index, *, dim, name, grid_info, chunksizes): self._index = index self._dim = dim self._grid_info = grid_info self._name = name + self._chunksizes = chunksizes @property def size(self): @@ -353,6 +388,10 @@ def size(self): def nbytes(self): return self._index.nbytes + @property + def chunksizes(self): + return self._chunksizes + @classmethod def from_array(cls, array, *, dim, name, grid_info): if grid_info.indexing_scheme != "nested": @@ -374,11 +413,22 @@ def from_array(cls, array, *, dim, name, grid_info): index = reduce(RangeMOCIndex.union, indexes) else: index = RangeMOCIndex.from_cell_ids(grid_info.level, array.astype("uint64")) - return cls(index, dim=dim, name=name, grid_info=grid_info) - def _replace(self, index): + chunksizes = {dim: getattr(array, "chunks", None)} + return cls( + index, dim=dim, name=name, grid_info=grid_info, chunksizes=chunksizes + ) + + def _replace(self, index, chunksizes=None): + if chunksizes is None: + chunksizes = self._chunksizes + return type(self)( - index, dim=self._dim, name=self._name, grid_info=self._grid_info + index, + dim=self._dim, + name=self._name, + grid_info=self._grid_info, + chunksizes=chunksizes, ) @classmethod @@ -394,23 +444,15 @@ def create_variables(self, variables): var = variables[name] attrs = var.attrs encoding = var.encoding - chunks = var.chunksizes.get(self._dim) else: attrs = None encoding = None - chunks = None + chunks = self._chunksizes[self._dim] if chunks is not None: import dask import dask.array as da - n_chunksizes = len(set(chunks)) - regular_chunks = n_chunksizes < 2 or ( - n_chunksizes == 2 and chunks[-1] < max(chunks) - ) - if not regular_chunks: - raise ValueError("irregular chunk sizes are not supported") - chunk_arrays = [ da.from_delayed( dask.delayed(extract_chunk)(self._index, slice_), @@ -434,8 +476,11 @@ def create_variables(self, variables): def isel(self, indexers): indexer = indexers[self._dim] + new_chunksizes = { + self._dim: subset_chunks(self._chunksizes[self._dim], indexer) + } - return self._replace(self._index.isel(indexer)) + return self._replace(self._index.isel(indexer), chunksizes=new_chunksizes) @register_dggs("healpix") diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 457588d2..faf80e1d 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -595,6 +595,7 @@ def test_isel(self, indexer): dim="cells", name="cell_ids", grid_info=grid_info, + chunksizes={"cells": None}, ) actual = index.isel({"cells": indexer}) @@ -603,6 +604,7 @@ def test_isel(self, indexer): dim="cells", name="cell_ids", grid_info=grid_info, + chunksizes={"cells": None}, ) assert isinstance(actual, healpix.HealpixMocIndex) @@ -628,6 +630,7 @@ def test_create_variables(self, chunks): dim="cells", name="cell_ids", grid_info=grid_info, + chunksizes={"cells": chunks}, ) if chunks is not None: @@ -646,35 +649,3 @@ def test_create_variables(self, chunks): assert actual.keys() == expected.keys() xr.testing.assert_equal(actual["cell_ids"], expected["cell_ids"]) - - @requires_dask - @pytest.mark.parametrize( - "chunks", - ( - pytest.param((12, 13, 14, 9), id="irregular"), - pytest.param((13, 13, 9, 13), id="one_chunk_smaller"), - pytest.param((14, 10, 10, 14), id="multiple_chunks_smaller"), - pytest.param((18, 10, 10, 10), id="only_first_bigger"), - ), - ) - def test_create_variables_irregular_chunks(self, chunks): - from healpix_geo.nested import RangeMOCIndex - - grid_info = healpix.HealpixInfo(level=1, indexing_scheme="nested") - cell_ids = np.arange(12 * 4**grid_info.level, dtype="uint64") - index = healpix.HealpixMocIndex( - RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids), - dim="cells", - name="cell_ids", - grid_info=grid_info, - ) - chunks = (12, 13, 14, 9) - - variables = { - "cell_ids": xr.Variable("cells", cell_ids, grid_info.to_dict()).chunk( - {"cells": chunks} - ) - } - - with pytest.raises(ValueError, match="irregular chunk sizes.*"): - index.create_variables(variables) From 92fa1c6e8023f9e580210f2f09d96b1cec32d148 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 15:01:25 +0200 Subject: [PATCH 36/60] require `numpy>=2` --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index bb384f28..2b00bb88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ classifiers = [ requires-python = ">=3.10" dependencies = [ "xarray", + "numpy>=2.0", "cdshealpix", "healpix-geo>=0.0.3", "h3ronpy", From f99c045241601dc86f9d76978f4fcb138bec8ce6 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 15:03:39 +0200 Subject: [PATCH 37/60] more (and smaller) tests --- xdggs/tests/test_healpix.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index faf80e1d..3a8ce9ea 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -614,15 +614,16 @@ def test_isel(self, indexer): @pytest.mark.parametrize( "chunks", [ - pytest.param((768, 768, 768, 768), marks=requires_dask), - pytest.param((780, 780, 780, 732), marks=requires_dask), + pytest.param((12, 12, 12, 12), marks=requires_dask), + pytest.param((18, 10, 10, 10), marks=requires_dask), + pytest.param((8, 12, 14, 14), marks=requires_dask), None, ], ) def test_create_variables(self, chunks): from healpix_geo.nested import RangeMOCIndex - grid_info = healpix.HealpixInfo(level=4, indexing_scheme="nested") + grid_info = healpix.HealpixInfo(level=1, indexing_scheme="nested") cell_ids = np.arange(12 * 4**grid_info.level, dtype="uint64") indexer = slice(3 * 4**grid_info.level, 7 * 4**grid_info.level) index = healpix.HealpixMocIndex( From 259b927d034fbc2dc90e535806b198ddbbe7f818 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 15:56:44 +0200 Subject: [PATCH 38/60] restructure the `isel` tests --- xdggs/healpix.py | 4 +++- xdggs/tests/test_healpix.py | 32 ++++++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 2ac3fa24..a4e962c1 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -359,9 +359,11 @@ def _subset(offset, chunk, indexer): return None chunk_offsets = np.cumulative_sum(chunks, include_initial=True) + total_length = chunk_offsets[-1] + concrete_slice = slice(*indexer.indices(total_length)) trimmed_chunks = tuple( - _subset(offset, chunk, indexer) + _subset(offset, chunk, concrete_slice) for offset, chunk in zip(chunk_offsets[:-1], chunks) ) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 3a8ce9ea..f8057921 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -583,28 +583,44 @@ def test_from_array(self, depth, cell_ids, max_computes): assert index.nbytes == 16 @pytest.mark.parametrize( - "indexer", (slice(None), slice(None, 4**4), slice(2 * 4**4, 7 * 4**4)) + "indexer", + (slice(None), slice(None, 4**1), slice(2 * 4**1, 7 * 4**1), slice(7, 25)), ) - def test_isel(self, indexer): + @pytest.mark.parametrize( + ["chunks", "expected_chunks"], + [ + (None, None), + pytest.param((12, 12, 12, 12), (), marks=requires_dask), + ], + ) + def test_isel(self, indexer, chunks, expected_chunks): from healpix_geo.nested import RangeMOCIndex - grid_info = healpix.HealpixInfo(level=4, indexing_scheme="nested") - cell_ids = np.arange(12 * 4**grid_info.level, dtype="uint64") + grid_info = healpix.HealpixInfo(level=1, indexing_scheme="nested") + if chunks is None: + cell_ids = np.arange(12 * 4**grid_info.level, dtype="uint64") + cell_ids_ = cell_ids + else: + import dask.array as da + + cell_ids_ = np.arange(12 * 4**grid_info.level, dtype="uint64") + cell_ids = da.arange(12 * 4**grid_info.level, dtype="uint64", chunks=chunks) + index = healpix.HealpixMocIndex( - RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids), + RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids_), dim="cells", name="cell_ids", grid_info=grid_info, - chunksizes={"cells": None}, + chunksizes={"cells": chunks}, ) actual = index.isel({"cells": indexer}) expected = healpix.HealpixMocIndex( - RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids[indexer]), + RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids_[indexer]), dim="cells", name="cell_ids", grid_info=grid_info, - chunksizes={"cells": None}, + chunksizes={"cells": getattr(cell_ids[indexer], "chunks", None)}, ) assert isinstance(actual, healpix.HealpixMocIndex) From f26e426da67d332927584a212ca682bc56763891 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 16:14:11 +0200 Subject: [PATCH 39/60] improve the `isel` test --- xdggs/tests/test_healpix.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index f8057921..f31d573c 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -587,44 +587,49 @@ def test_from_array(self, depth, cell_ids, max_computes): (slice(None), slice(None, 4**1), slice(2 * 4**1, 7 * 4**1), slice(7, 25)), ) @pytest.mark.parametrize( - ["chunks", "expected_chunks"], + "chunks", [ - (None, None), - pytest.param((12, 12, 12, 12), (), marks=requires_dask), + pytest.param(None, id="none"), + pytest.param((12, 12, 12, 12), marks=requires_dask, id="equally_sized"), ], ) - def test_isel(self, indexer, chunks, expected_chunks): + def test_isel(self, indexer, chunks): from healpix_geo.nested import RangeMOCIndex grid_info = healpix.HealpixInfo(level=1, indexing_scheme="nested") + cell_ids = np.arange(12 * 4**grid_info.level, dtype="uint64") if chunks is None: - cell_ids = np.arange(12 * 4**grid_info.level, dtype="uint64") - cell_ids_ = cell_ids + input_chunks = None + expected_chunks = None else: import dask.array as da - cell_ids_ = np.arange(12 * 4**grid_info.level, dtype="uint64") - cell_ids = da.arange(12 * 4**grid_info.level, dtype="uint64", chunks=chunks) + cell_ids_ = da.arange( + 12 * 4**grid_info.level, dtype="uint64", chunks=chunks + ) + input_chunks = cell_ids_.chunks[0] + expected_chunks = cell_ids_[indexer].chunks[0] index = healpix.HealpixMocIndex( - RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids_), + RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids), dim="cells", name="cell_ids", grid_info=grid_info, - chunksizes={"cells": chunks}, + chunksizes={"cells": input_chunks}, ) actual = index.isel({"cells": indexer}) expected = healpix.HealpixMocIndex( - RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids_[indexer]), + RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids[indexer]), dim="cells", name="cell_ids", grid_info=grid_info, - chunksizes={"cells": getattr(cell_ids[indexer], "chunks", None)}, + chunksizes={"cells": expected_chunks}, ) assert isinstance(actual, healpix.HealpixMocIndex) assert actual.nbytes == expected.nbytes + assert actual.chunksizes == expected.chunksizes np.testing.assert_equal(actual._index.cell_ids(), expected._index.cell_ids()) @pytest.mark.parametrize( From a786344ae23175e855452c13e213b50201c6b759 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 16:14:40 +0200 Subject: [PATCH 40/60] fix two bugs in the chunk subsetting --- xdggs/healpix.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index a4e962c1..1b8dc280 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -344,7 +344,7 @@ def _subset(offset, chunk, indexer): return chunk else: # partial chunk - left_trim = offset - indexer.start + left_trim = indexer.start - offset right_trim = offset + chunk - indexer.stop if left_trim < 0: @@ -367,7 +367,7 @@ def _subset(offset, chunk, indexer): for offset, chunk in zip(chunk_offsets[:-1], chunks) ) - return tuple(chunk for chunk in trimmed_chunks if chunk > 0) + return tuple(int(chunk) for chunk in trimmed_chunks if chunk > 0) def extract_chunk(index, slice_): From 7bec771b536e32a054a30375d5faabcf6366c3f0 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 16:16:36 +0200 Subject: [PATCH 41/60] remove the default for the `chunksizes` parameter --- xdggs/healpix.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 1b8dc280..d4779218 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -421,10 +421,7 @@ def from_array(cls, array, *, dim, name, grid_info): index, dim=dim, name=name, grid_info=grid_info, chunksizes=chunksizes ) - def _replace(self, index, chunksizes=None): - if chunksizes is None: - chunksizes = self._chunksizes - + def _replace(self, index, chunksizes): return type(self)( index, dim=self._dim, From 826500bb364c45747b3eff8e4856bd070ef96bb4 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 17:13:25 +0200 Subject: [PATCH 42/60] =?UTF-8?q?depth=20=E2=86=92=20level?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xdggs/tests/test_healpix.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index f31d573c..5c8770b5 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -536,7 +536,7 @@ def test_repr_inline(level, max_width) -> None: class TestHealpixMocIndex: @pytest.mark.parametrize( - ["depth", "cell_ids", "max_computes"], + ["level", "cell_ids", "max_computes"], ( pytest.param( 2, np.arange(12 * 4**2, dtype="uint64"), 1, id="numpy-2-full_domain" @@ -570,8 +570,8 @@ class TestHealpixMocIndex: ), ), ) - def test_from_array(self, depth, cell_ids, max_computes): - grid_info = healpix.HealpixInfo(level=depth, indexing_scheme="nested") + def test_from_array(self, level, cell_ids, max_computes): + grid_info = healpix.HealpixInfo(level=level, indexing_scheme="nested") with raise_if_dask_computes(max_computes=max_computes): index = healpix.HealpixMocIndex.from_array( From 545557e9f2f80c5c68f51c61acc7364482fcae61 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 17:15:41 +0200 Subject: [PATCH 43/60] tests for `from_variables` --- xdggs/tests/test_healpix.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 5c8770b5..1af84c94 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -582,6 +582,36 @@ def test_from_array(self, level, cell_ids, max_computes): assert index.size == cell_ids.size assert index.nbytes == 16 + @pytest.mark.parametrize("dask", [False, pytest.param(True, marks=requires_dask)]) + @pytest.mark.parametrize( + ["level", "cell_ids"], + ( + ( + 1, + np.array( + [0, 1, 2, 3, 4, 5, 6, 7, 8, 22, 23, 24, 25, 43, 45, 46, 47], + dtype="uint64", + ), + ), + (4, np.arange(12 * 4**4, dtype="uint64")), + ), + ) + def test_from_variables(self, level, cell_ids, dask): + grid_info_mapping = { + "grid_name": "healpix", + "level": level, + "indexing_scheme": "nested", + } + variables = {"cell_ids": xr.Variable("cells", cell_ids, grid_info_mapping)} + if dask: + variables["cell_ids"] = variables["cell_ids"].chunk(4**level) + + actual = healpix.HealpixMocIndex.from_variables(variables, options={}) + + assert isinstance(actual, healpix.HealpixMocIndex) + assert actual.size == cell_ids.size + np.testing.assert_equal(actual._index.cell_ids(), cell_ids) + @pytest.mark.parametrize( "indexer", (slice(None), slice(None, 4**1), slice(2 * 4**1, 7 * 4**1), slice(7, 25)), From 82b4580a09f8be05615f12fe47609dbb3b58090c Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 17:24:44 +0200 Subject: [PATCH 44/60] more tests for edge cases --- xdggs/tests/test_healpix.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 1af84c94..7ae11ddb 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -582,6 +582,16 @@ def test_from_array(self, level, cell_ids, max_computes): assert index.size == cell_ids.size assert index.nbytes == 16 + def test_from_array_unsupported_indexing_scheme(self): + level = 1 + cell_ids = np.arange(12 * 4**level, dtype="uint64") + grid_info = healpix.HealpixInfo(level=level, indexing_scheme="ring") + + with pytest.raises(ValueError, match=".*only supports the 'nested' scheme"): + healpix.HealpixMocIndex.from_array( + cell_ids, dim="cells", name="cell_ids", grid_info=grid_info + ) + @pytest.mark.parametrize("dask", [False, pytest.param(True, marks=requires_dask)]) @pytest.mark.parametrize( ["level", "cell_ids"], @@ -701,3 +711,22 @@ def test_create_variables(self, chunks): assert actual.keys() == expected.keys() xr.testing.assert_equal(actual["cell_ids"], expected["cell_ids"]) + + def test_create_variables_new(self): + from healpix_geo.nested import RangeMOCIndex + + grid_info = healpix.HealpixInfo(level=1, indexing_scheme="nested") + cell_ids = np.arange(12 * 4**grid_info.level, dtype="uint64") + indexer = slice(3 * 4**grid_info.level, 7 * 4**grid_info.level) + index = healpix.HealpixMocIndex( + RangeMOCIndex.from_cell_ids(grid_info.level, cell_ids[indexer]), + dim="cells", + name="cell_ids", + grid_info=grid_info, + chunksizes={"cells": None}, + ) + actual = index.create_variables({}) + expected = {"cell_ids": xr.Variable("cells", cell_ids[indexer])} + + assert actual.keys() == expected.keys() + xr.testing.assert_equal(actual["cell_ids"], expected["cell_ids"]) From f88d55b3249d011fde2bf3f0f7b64b87193165b5 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Thu, 12 Jun 2025 17:28:59 +0200 Subject: [PATCH 45/60] check that the index is created properly --- xdggs/tests/test_healpix.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 7ae11ddb..c71a8518 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -500,6 +500,19 @@ def test_from_variables(variable_name, variable, options) -> None: np.testing.assert_equal(index._index.index.values, variable.data) +def test_from_variables_moc() -> None: + level = 2 + grid_info = {"grid_name": "healpix", "level": level, "indexing_scheme": "nested"} + variables = {"cell_ids": xr.Variable("cells", np.arange(12 * 4**level), grid_info)} + + index = healpix.HealpixIndex.from_variables( + variables, options={"index_kind": "moc"} + ) + + assert isinstance(index._index, healpix.HealpixMocIndex) + assert index.grid_info.to_dict() == grid_info + + @pytest.mark.parametrize(["old_variable", "new_variable"], variable_combinations) def test_replace(old_variable, new_variable) -> None: grid = healpix.HealpixInfo.from_dict(old_variable.attrs) From ba2a9258f241932643224b08eee8dca8bf4f2d39 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 13 Jun 2025 13:41:58 +0200 Subject: [PATCH 46/60] check the extracted chunks --- xdggs/tests/test_healpix.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index c71a8518..7dc7655a 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -592,6 +592,8 @@ def test_from_array(self, level, cell_ids, max_computes): ) assert isinstance(index, healpix.HealpixMocIndex) + chunks = index.chunksizes["cells"] + assert chunks is None or isinstance(chunks[0], int) assert index.size == cell_ids.size assert index.nbytes == 16 From 3a28ed09bd6d7d132fbbf66b86bcdf36ce43aa61 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 13 Jun 2025 13:42:45 +0200 Subject: [PATCH 47/60] explicitly raise an error for non-1D cell id coords --- xdggs/healpix.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index d4779218..ca2d1e03 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -401,6 +401,9 @@ def from_array(cls, array, *, dim, name, grid_info): "The MOC index currently only supports the 'nested' scheme" ) + if array.ndim != 1: + raise ValueError("only 1D cell ids are supported") + if array.size == 12 * 4**grid_info.level: index = RangeMOCIndex.full_domain(grid_info.level) elif isinstance(array, dask_array_type): From cf12c82500b9ee7bb92ec0fbc8789334b4eac4f6 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 13 Jun 2025 13:43:26 +0200 Subject: [PATCH 48/60] properly extract the chunks --- xdggs/healpix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index ca2d1e03..e924fa32 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -419,7 +419,7 @@ def from_array(cls, array, *, dim, name, grid_info): else: index = RangeMOCIndex.from_cell_ids(grid_info.level, array.astype("uint64")) - chunksizes = {dim: getattr(array, "chunks", None)} + chunksizes = {dim: array.chunks[0] if hasattr(array, "chunks") else None} return cls( index, dim=dim, name=name, grid_info=grid_info, chunksizes=chunksizes ) From 1b4c8ebce7c2502e3b74c47c25ae1e12ca66089d Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 13 Jun 2025 14:42:48 +0200 Subject: [PATCH 49/60] also replace the typing to `xr.Index` --- xdggs/index.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xdggs/index.py b/xdggs/index.py index 6759df94..b0ce174b 100644 --- a/xdggs/index.py +++ b/xdggs/index.py @@ -42,12 +42,12 @@ def decode(ds, grid_info=None, *, name="cell_ids", index_kind="pandas"): class DGGSIndex(Index): _dim: str - _index: PandasIndex + _index: xr.Index - def __init__(self, cell_ids: Any | PandasIndex, dim: str, grid_info: DGGSInfo): + def __init__(self, cell_ids: Any | xr.Index, dim: str, grid_info: DGGSInfo): self._dim = dim - if isinstance(cell_ids, PandasIndex): + if isinstance(cell_ids, xr.Index): self._index = cell_ids else: self._index = PandasIndex(cell_ids, dim) From 92c484f224d2d594a714bb324b2f41f6c47fca02 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 13 Jun 2025 21:19:39 +0200 Subject: [PATCH 50/60] accept index options instead of specific args in `decode` --- xdggs/accessor.py | 12 +++++++----- xdggs/index.py | 14 +++++++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/xdggs/accessor.py b/xdggs/accessor.py index fd05fefe..7f92f01c 100644 --- a/xdggs/accessor.py +++ b/xdggs/accessor.py @@ -30,7 +30,7 @@ def __init__(self, obj: xr.Dataset | xr.DataArray): self._index = index def decode( - self, grid_info=None, *, name="cell_ids", index_kind="pandas" + self, grid_info=None, *, name="cell_ids", index_options=None, **index_kwargs ) -> xr.Dataset | xr.DataArray: """decode the DGGS cell ids @@ -41,9 +41,8 @@ def decode( the dataset. name : str, default: "cell_ids" The name of the coordinate containing the cell ids. - index_kind : str, default: "pandas" - The kind of index to use. This is implementation-dependent, with all - implementations supporting at least in-memory indexes (``"pandas"``). + index_options, **index_kwargs : dict, optional + Additional options to forward to the index. Returns ------- @@ -56,8 +55,11 @@ def decode( if isinstance(grid_info, dict): var.attrs = grid_info + if index_options is None: + index_options = {} + return self._obj.drop_indexes(name, errors="ignore").set_xindex( - name, DGGSIndex, index_kind=index_kind + name, DGGSIndex, **(index_options | index_kwargs) ) @property diff --git a/xdggs/index.py b/xdggs/index.py index b0ce174b..def181ff 100644 --- a/xdggs/index.py +++ b/xdggs/index.py @@ -9,7 +9,7 @@ from xdggs.utils import GRID_REGISTRY, _extract_cell_id_variable -def decode(ds, grid_info=None, *, name="cell_ids", index_kind="pandas"): +def decode(ds, grid_info=None, *, name="cell_ids", index_options=None, **index_kwargs): """ decode grid parameters and create a DGGS index @@ -23,9 +23,8 @@ def decode(ds, grid_info=None, *, name="cell_ids", index_kind="pandas"): the dataset. name : str, default: "cell_ids" The name of the coordinate containing the cell ids. - index_kind : str, default: "pandas" - The kind of index to use. This is implementation-dependent, with all - implementations supporting at least in-memory indexes (``"pandas"``). + index_options, **index_kwargs : dict, optional + Additional options to forward to the index. Returns ------- @@ -37,7 +36,12 @@ def decode(ds, grid_info=None, *, name="cell_ids", index_kind="pandas"): xarray.Dataset.dggs.decode xarray.DataArray.dggs.decode """ - return ds.dggs.decode(name=name, grid_info=grid_info, index_kind=index_kind) + if index_options is None: + index_options = {} + + return ds.dggs.decode( + name=name, grid_info=grid_info, index_options=index_options | index_kwargs + ) class DGGSIndex(Index): From c8660465d062e768d9396a0c4a1de19729151698 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 13 Jun 2025 21:23:21 +0200 Subject: [PATCH 51/60] remove support for the `index_kind` kwarg for H3 --- xdggs/h3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xdggs/h3.py b/xdggs/h3.py index d0831093..ec9a3dbc 100644 --- a/xdggs/h3.py +++ b/xdggs/h3.py @@ -222,7 +222,6 @@ def from_variables( ) -> "H3Index": _, var, dim = _extract_cell_id_variable(variables) - options.pop("index_kind", None) grid_info = H3Info.from_dict(var.attrs | options) return cls(var.data, dim, grid_info) From 512c905685ab9606531f4035813a83783f1baa67 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 13 Jun 2025 21:27:01 +0200 Subject: [PATCH 52/60] mark the `raise_if_dask_computes` as vendored from `xarray` --- xdggs/tests/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xdggs/tests/__init__.py b/xdggs/tests/__init__.py index 683f18e4..1d767c0b 100644 --- a/xdggs/tests/__init__.py +++ b/xdggs/tests/__init__.py @@ -26,6 +26,7 @@ def arange(*args, **kwargs): has_dask = False +# vendored from xarray class CountingScheduler: """Simple dask scheduler counting the number of computes. @@ -51,6 +52,7 @@ def geoarrow_to_shapely(arr): return shapely.from_wkb(ga.as_wkb(arr)) +# vendored from xarray def raise_if_dask_computes(max_computes=0): # return a dummy context manager so that this can be used for non-dask objects if not has_dask: From 6de6c33672ba792c81dcfbe34790d4605158c731 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 16 Jun 2025 16:20:01 +0200 Subject: [PATCH 53/60] docstrings for the moc index wrapper --- xdggs/healpix.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index e924fa32..a9ae65f2 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -375,6 +375,13 @@ def extract_chunk(index, slice_): class HealpixMocIndex(xr.Index): + """More efficient index for healpix cell ids based on a MOC + + This uses the rust `moc crate `_ to represent + cell ids as a set of disconnected ranges at level 29, vastly reducing the + memory footprint and computation time of set-like operations. + """ + def __init__(self, index, *, dim, name, grid_info, chunksizes): self._index = index self._dim = dim @@ -384,18 +391,47 @@ def __init__(self, index, *, dim, name, grid_info, chunksizes): @property def size(self): + """The number of indexed cells.""" return self._index.size @property def nbytes(self): + """The number of bytes occupied by the index. + + .. note:: + This does not take any (constant) overhead into account. + """ return self._index.nbytes @property def chunksizes(self): + """The size of the chunks of the indexed coordinate.""" return self._chunksizes @classmethod def from_array(cls, array, *, dim, name, grid_info): + """Construct an index from a raw array. + + Parameters + ---------- + array : array-like + The array of cell ids as uint64. If the size is equal to the total + number of cells at the given refinement level, creates a full domain + index without looking at the cell ids. If a chunked array, it will + create indexes for each chunk and then merge the chunk indexes + in-memory. + dim : hashable + The dimension of the index. + name : hashable + The name of the indexed coordinate. + grid_info : xdggs.HealpixInfo + The grid parameters. + + Returns + ------- + index : HealpixMocIndex + The resulting index. + """ if grid_info.indexing_scheme != "nested": raise ValueError( "The MOC index currently only supports the 'nested' scheme" @@ -435,12 +471,39 @@ def _replace(self, index, chunksizes): @classmethod def from_variables(cls, variables, *, options): + """Create a new index object from the cell id coordinate variable + + Parameters + ---------- + variables : dict-like + Mapping of :py:class:`Variable` objects holding the coordinate labels + to index. + options : dict-like + Mapping of arbitrary options to pass to the HealpixInfo object. + + Returns + ------- + index : Index + A new Index object. + """ name, var, dim = _extract_cell_id_variable(variables) grid_info = HealpixInfo.from_dict(var.attrs | options) return cls.from_array(var.data, dim=dim, name=name, grid_info=grid_info) def create_variables(self, variables): + """Create new coordinate variables from this index + + Parameters + ---------- + variables : dict-like, optional + Mapping of :py:class:`Variable` objects. + + Returns + ------- + index_variables : dict-like + Dictionary of :py:class:`Variable` objects. + """ name = self._name if variables is not None and name in variables: var = variables[name] @@ -477,6 +540,21 @@ def create_variables(self, variables): return {name: var} def isel(self, indexers): + """Subset the index using positional indexers. + + Parameters + ---------- + indexers : dict-like + A dictionary of positional indexers as passed from + :py:meth:`Dataset.isel` and where the entries have been filtered for + the current index. Note that the underlying index currently only + supports slices. + + Returns + ------- + maybe_index : Index + A new Index object or ``None``. + """ indexer = indexers[self._dim] new_chunksizes = { self._dim: subset_chunks(self._chunksizes[self._dim], indexer) From 387a9be6d8c7b7d1bb754fdeb5abfc03fffe6e4c Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 16 Jun 2025 16:30:22 +0200 Subject: [PATCH 54/60] point towards `healpix_geo.nested.RangeMOCIndex` --- docs/conf.py | 1 + xdggs/healpix.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 8cc092e9..867ed5a3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -115,6 +115,7 @@ "healpy": ("https://healpy.readthedocs.io/en/latest", None), "cdshealpix-python": ("https://cds-astro.github.io/cds-healpix-python", None), "shapely": ("https://shapely.readthedocs.io/en/stable", None), + "healpix-geo": ("https://healpix-geo.readthedocs.io/en/latest", None), } # -- myst-nb options --------------------------------------------------------- diff --git a/xdggs/healpix.py b/xdggs/healpix.py index a9ae65f2..e564121a 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -380,6 +380,10 @@ class HealpixMocIndex(xr.Index): This uses the rust `moc crate `_ to represent cell ids as a set of disconnected ranges at level 29, vastly reducing the memory footprint and computation time of set-like operations. + + See Also + -------- + healpix_geo.nested.RangeMOCIndex """ def __init__(self, index, *, dim, name, grid_info, chunksizes): From 8a809b92e87af12392c7368de7237571abe9be9d Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 16 Jun 2025 16:30:42 +0200 Subject: [PATCH 55/60] point out this only works for `nested` --- xdggs/healpix.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index e564121a..09148e90 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -381,6 +381,10 @@ class HealpixMocIndex(xr.Index): cell ids as a set of disconnected ranges at level 29, vastly reducing the memory footprint and computation time of set-like operations. + .. warning:: + + Only supported for the ``nested`` scheme. + See Also -------- healpix_geo.nested.RangeMOCIndex From 45842e5bd824b6c1ebaa104c54feab7a51aae26e Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 16 Jun 2025 17:34:26 +0200 Subject: [PATCH 56/60] more comments on the role of the different index objects --- xdggs/healpix.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 09148e90..6e0390a3 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -374,6 +374,7 @@ def extract_chunk(index, slice_): return index.isel(slice_).cell_ids() +# optionally replaces the PandasIndex within HealpixIndex class HealpixMocIndex(xr.Index): """More efficient index for healpix cell ids based on a MOC @@ -388,6 +389,7 @@ class HealpixMocIndex(xr.Index): See Also -------- healpix_geo.nested.RangeMOCIndex + The low-level implementation of the index functionality. """ def __init__(self, index, *, dim, name, grid_info, chunksizes): From 0b86fad167688ef9d704b62ebe30c143bbd862bc Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 30 Jun 2025 14:11:09 +0200 Subject: [PATCH 57/60] figure out chunk sizes after indexing --- xdggs/healpix.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 6e0390a3..a5117f7c 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -335,7 +335,7 @@ def construct_chunk_ranges(chunks, until): def subset_chunks(chunks, indexer): - def _subset(offset, chunk, indexer): + def _subset_slice(offset, chunk, indexer): if offset >= indexer.stop or offset + chunk < indexer.start: # outside slice return 0 @@ -355,6 +355,17 @@ def _subset(offset, chunk, indexer): return chunk - left_trim - right_trim + def _subset_array(offset, chunk, indexer): + mask = (indexer >= offset) & (indexer < offset + chunk) + + return np.sum(mask.asdtype(int)) + + def _subset(offset, chunk, indexer): + if isinstance(indexer, slice): + return _subset_slice(offset, chunk, indexer) + else: + return _subset_array(offset, chunk, indexer) + if chunks is None: return None From 79ca5ecd4a0c4faf6e6ed59d9776cad1e831aab5 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 30 Jun 2025 14:17:06 +0200 Subject: [PATCH 58/60] make sure we only pass uint64 arrays to the low-level index --- xdggs/healpix.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index a5117f7c..37f780dd 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -577,6 +577,16 @@ def isel(self, indexers): A new Index object or ``None``. """ indexer = indexers[self._dim] + if isinstance(indexer, np.ndarray): + if np.isdtype(indexer.dtype, "signed integer"): + indexer = np.where(indexer >= 0, indexer, self.size + indexer).astype( + "uint64" + ) + elif np.isdtype(indexer, "unsigned integer"): + indexer = indexer.astype("uint64") + else: + raise ValueError("can only index with integer arrays or slices") + new_chunksizes = { self._dim: subset_chunks(self._chunksizes[self._dim], indexer) } From 923b9d564cc639ac8e5014531ebda8b225fbe970 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 30 Jun 2025 14:21:18 +0200 Subject: [PATCH 59/60] check that the array indexer works --- xdggs/tests/test_healpix.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/xdggs/tests/test_healpix.py b/xdggs/tests/test_healpix.py index 7dc7655a..5e31366b 100644 --- a/xdggs/tests/test_healpix.py +++ b/xdggs/tests/test_healpix.py @@ -639,7 +639,15 @@ def test_from_variables(self, level, cell_ids, dask): @pytest.mark.parametrize( "indexer", - (slice(None), slice(None, 4**1), slice(2 * 4**1, 7 * 4**1), slice(7, 25)), + ( + slice(None), + slice(None, 4**1), + slice(2 * 4**1, 7 * 4**1), + slice(7, 25), + np.array([-4, -3, -2], dtype="int64"), + np.array([12, 13, 14, 15, 16], dtype="uint64"), + np.array([1, 2, 3, 4, 5], dtype="uint32"), + ), ) @pytest.mark.parametrize( "chunks", From 5415f521b9cfd2e82703f3d011c828cdc289aa86 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Mon, 30 Jun 2025 14:22:11 +0200 Subject: [PATCH 60/60] fix bugs introduced into `isel` --- xdggs/healpix.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xdggs/healpix.py b/xdggs/healpix.py index 37f780dd..2244f165 100644 --- a/xdggs/healpix.py +++ b/xdggs/healpix.py @@ -358,7 +358,7 @@ def _subset_slice(offset, chunk, indexer): def _subset_array(offset, chunk, indexer): mask = (indexer >= offset) & (indexer < offset + chunk) - return np.sum(mask.asdtype(int)) + return np.sum(mask.astype(int)) def _subset(offset, chunk, indexer): if isinstance(indexer, slice): @@ -371,10 +371,12 @@ def _subset(offset, chunk, indexer): chunk_offsets = np.cumulative_sum(chunks, include_initial=True) total_length = chunk_offsets[-1] - concrete_slice = slice(*indexer.indices(total_length)) + + if isinstance(indexer, slice): + indexer = slice(*indexer.indices(total_length)) trimmed_chunks = tuple( - _subset(offset, chunk, concrete_slice) + _subset(offset, chunk, indexer) for offset, chunk in zip(chunk_offsets[:-1], chunks) ) @@ -582,7 +584,7 @@ def isel(self, indexers): indexer = np.where(indexer >= 0, indexer, self.size + indexer).astype( "uint64" ) - elif np.isdtype(indexer, "unsigned integer"): + elif np.isdtype(indexer.dtype, "unsigned integer"): indexer = indexer.astype("uint64") else: raise ValueError("can only index with integer arrays or slices")