From 81572641bd5638a5eb7aa8215eaac14151ef52b0 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 18 Oct 2022 11:33:44 +0200 Subject: [PATCH 1/3] add MultiPandasIndex --- xarray/indexes/__init__.py | 3 +- xarray/indexes/multipandasindex.py | 170 +++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 xarray/indexes/multipandasindex.py diff --git a/xarray/indexes/__init__.py b/xarray/indexes/__init__.py index 41321c9a0ff..cd7973cd969 100644 --- a/xarray/indexes/__init__.py +++ b/xarray/indexes/__init__.py @@ -3,5 +3,6 @@ """ from ..core.indexes import Index, PandasIndex, PandasMultiIndex +from .multipandasindex import MultiPandasIndex -__all__ = ["Index", "PandasIndex", "PandasMultiIndex"] +__all__ = ["Index", "MultiPandasIndex", "PandasIndex", "PandasMultiIndex"] diff --git a/xarray/indexes/multipandasindex.py b/xarray/indexes/multipandasindex.py new file mode 100644 index 00000000000..d1fa9d9930d --- /dev/null +++ b/xarray/indexes/multipandasindex.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +from typing import Any, Hashable, Mapping, TypeVar + +import numpy as np + +from ..core.indexes import Index, IndexVars, PandasIndex +from ..core.indexing import IndexSelResult, merge_sel_results +from ..core.types import JoinOptions +from ..core.utils import Frozen +from ..core.variable import Variable + +T_MultiPandasIndex = TypeVar("T_MultiPandasIndex", bound="MultiPandasIndex") + + +class MultiPandasIndex(Index): + """Helper class to implement meta-indexes encapsulating + one or more (single) pandas indexes. + + Each pandas index must relate to a separate dimension. + + This class shoudn't be instantiated directly. + + """ + + indexes: Frozen[Hashable, PandasIndex] + dims: Frozen[Hashable, int] + + __slots__ = ("indexes", "dims") + + def __init__(self, indexes: Mapping[Hashable, PandasIndex]): + dims = {idx.dim: idx.index.size for idx in indexes.values()} + + seen = set() + dup_dims = [d for d in dims if d in seen or seen.add(d)] + if dup_dims: + raise ValueError( + f"cannot create a {self.__class__.__name__} from coordinates " + f"sharing common dimension(s): {dup_dims}" + ) + + self.indexes = Frozen(indexes) + self.dims = Frozen(dims) + + @classmethod + def from_variables( + cls: type[T_MultiPandasIndex], variables: Mapping[Any, Variable], options + ) -> T_MultiPandasIndex: + indexes = { + k: PandasIndex.from_variables({k: v}, options={}) + for k, v in variables.items() + } + + return cls(indexes) + + def create_variables( + self, variables: Mapping[Any, Variable] | None = None + ) -> IndexVars: + + idx_variables = {} + + for idx in self.indexes.values(): + idx_variables.update(idx.create_variables(variables)) + + return idx_variables + + def isel( + self: T_MultiPandasIndex, + indexers: Mapping[Any, int | slice | np.ndarray | Variable], + ) -> T_MultiPandasIndex | PandasIndex | None: + new_indexes = {} + + for k, idx in self.indexes.items(): + if k in indexers: + new_idx = idx.isel({k: indexers[k]}) + if new_idx is not None: + new_indexes[k] = new_idx + else: + new_indexes[k] = idx + + # + # How should we deal with dropped index(es) (scalar selection)? + # - drop the whole index? + # - always return a MultiPandasIndex with remaining index(es)? + # - return either a MultiPandasIndex or a PandasIndex? + # + + if not len(new_indexes): + return None + elif len(new_indexes) == 1: + return next(iter(new_indexes.values())) + else: + return type(self)(new_indexes) + + def sel(self, labels: dict[Any, Any], **kwargs) -> IndexSelResult: + results: list[IndexSelResult] = [] + + for k, idx in self.indexes.items(): + if k in labels: + results.append(idx.sel({k: labels[k]}, **kwargs)) + + return merge_sel_results(results) + + def _get_unmatched_names( + self: T_MultiPandasIndex, other: T_MultiPandasIndex + ) -> set: + return set(self.indexes).symmetric_difference(other.indexes) + + def equals(self: T_MultiPandasIndex, other: T_MultiPandasIndex) -> bool: + # We probably don't need to check for matching coordinate names + # as this is already done during alignment when finding matching indexes. + # This may change in the future, though. + # see https://github.com/pydata/xarray/issues/7002 + if self._get_unmatched_names(other): + return False + else: + return all( + [idx.equals(other.indexes[k]) for k, idx in self.indexes.items()] + ) + + def join( + self: T_MultiPandasIndex, other: T_MultiPandasIndex, how: JoinOptions = "inner" + ) -> T_MultiPandasIndex: + new_indexes = {} + + for k, idx in self.indexes.items(): + new_indexes[k] = idx.join(other.indexes[k], how=how) + + return type(self)(new_indexes) + + def reindex_like( + self: T_MultiPandasIndex, other: T_MultiPandasIndex + ) -> dict[Hashable, Any]: + dim_indexers = {} + + for k, idx in self.indexes.items(): + dim_indexers.update(idx.reindex_like(other.indexes[k])) + + return dim_indexers + + def roll(self: T_MultiPandasIndex, shifts: Mapping[Any, int]) -> T_MultiPandasIndex: + new_indexes = {} + + for k, idx in self.indexes.items(): + if k in shifts: + new_indexes[k] = idx.roll({k: shifts[k]}) + else: + new_indexes[k] = idx + + return type(self)(new_indexes) + + def rename( + self: T_MultiPandasIndex, + name_dict: Mapping[Any, Hashable], + dims_dict: Mapping[Any, Hashable], + ) -> T_MultiPandasIndex: + new_indexes = {} + + for k, idx in self.indexes.items(): + new_indexes[k] = idx.rename(name_dict, dims_dict) + + return type(self)(new_indexes) + + def copy(self: T_MultiPandasIndex, deep: bool = True) -> T_MultiPandasIndex: + new_indexes = {} + + for k, idx in self.indexes.items(): + new_indexes[k] = idx.copy(deep=deep) + + return type(self)(new_indexes) From e4d753c3bf3ffdc30864510885c68fdb2e8349a2 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 18 Oct 2022 15:02:49 +0200 Subject: [PATCH 2/3] fix mypy errors --- xarray/indexes/multipandasindex.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/indexes/multipandasindex.py b/xarray/indexes/multipandasindex.py index d1fa9d9930d..2de77c160a2 100644 --- a/xarray/indexes/multipandasindex.py +++ b/xarray/indexes/multipandasindex.py @@ -6,7 +6,6 @@ from ..core.indexes import Index, IndexVars, PandasIndex from ..core.indexing import IndexSelResult, merge_sel_results -from ..core.types import JoinOptions from ..core.utils import Frozen from ..core.variable import Variable @@ -32,7 +31,13 @@ def __init__(self, indexes: Mapping[Hashable, PandasIndex]): dims = {idx.dim: idx.index.size for idx in indexes.values()} seen = set() - dup_dims = [d for d in dims if d in seen or seen.add(d)] + dup_dims = [] + for d in dims: + if d in seen: + dup_dims.append(d) + else: + seen.add(d) + if dup_dims: raise ValueError( f"cannot create a {self.__class__.__name__} from coordinates " @@ -119,7 +124,7 @@ def equals(self: T_MultiPandasIndex, other: T_MultiPandasIndex) -> bool: ) def join( - self: T_MultiPandasIndex, other: T_MultiPandasIndex, how: JoinOptions = "inner" + self: T_MultiPandasIndex, other: T_MultiPandasIndex, how: str = "inner" ) -> T_MultiPandasIndex: new_indexes = {} From f421596e991f4979f791b56792cd1c9fd13158f5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 12 Jul 2025 22:24:25 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/indexes/__init__.py | 4 ++-- xarray/indexes/multipandasindex.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/xarray/indexes/__init__.py b/xarray/indexes/__init__.py index 624a016392e..7914a005db6 100644 --- a/xarray/indexes/__init__.py +++ b/xarray/indexes/__init__.py @@ -2,7 +2,6 @@ DataArray objects. """ -from .multipandasindex import MultiPandasIndex from xarray.core.coordinate_transform import CoordinateTransform from xarray.core.indexes import ( @@ -11,6 +10,7 @@ PandasIndex, PandasMultiIndex, ) +from xarray.indexes.multipandasindex import MultiPandasIndex from xarray.indexes.nd_point_index import NDPointIndex, TreeAdapter from xarray.indexes.range_index import RangeIndex @@ -18,10 +18,10 @@ "CoordinateTransform", "CoordinateTransformIndex", "Index", + "MultiPandasIndex", "NDPointIndex", "PandasIndex", "PandasMultiIndex", "RangeIndex", "TreeAdapter", - "MultiPandasIndex", ] diff --git a/xarray/indexes/multipandasindex.py b/xarray/indexes/multipandasindex.py index 2de77c160a2..04f3092b071 100644 --- a/xarray/indexes/multipandasindex.py +++ b/xarray/indexes/multipandasindex.py @@ -1,13 +1,14 @@ from __future__ import annotations -from typing import Any, Hashable, Mapping, TypeVar +from collections.abc import Hashable, Mapping +from typing import Any, TypeVar import numpy as np -from ..core.indexes import Index, IndexVars, PandasIndex -from ..core.indexing import IndexSelResult, merge_sel_results -from ..core.utils import Frozen -from ..core.variable import Variable +from xarray.core.indexes import Index, IndexVars, PandasIndex +from xarray.core.indexing import IndexSelResult, merge_sel_results +from xarray.core.utils import Frozen +from xarray.core.variable import Variable T_MultiPandasIndex = TypeVar("T_MultiPandasIndex", bound="MultiPandasIndex") @@ -18,14 +19,14 @@ class MultiPandasIndex(Index): Each pandas index must relate to a separate dimension. - This class shoudn't be instantiated directly. + This class shouldn't be instantiated directly. """ indexes: Frozen[Hashable, PandasIndex] dims: Frozen[Hashable, int] - __slots__ = ("indexes", "dims") + __slots__ = ("dims", "indexes") def __init__(self, indexes: Mapping[Hashable, PandasIndex]): dims = {idx.dim: idx.index.size for idx in indexes.values()} @@ -61,7 +62,6 @@ def from_variables( def create_variables( self, variables: Mapping[Any, Variable] | None = None ) -> IndexVars: - idx_variables = {} for idx in self.indexes.values(): @@ -90,7 +90,7 @@ def isel( # - return either a MultiPandasIndex or a PandasIndex? # - if not len(new_indexes): + if not new_indexes: return None elif len(new_indexes) == 1: return next(iter(new_indexes.values()))