From abce7dd4c5951fa83569ef019da34be102aff1ba Mon Sep 17 00:00:00 2001 From: DHRUVA KUMAR KAUSHAL Date: Tue, 8 Jul 2025 00:41:54 +0530 Subject: [PATCH 1/4] logic update --- doc/whats-new.rst | 6 ++++++ xarray/core/dataset.py | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e8b602e9dc9..66e1a664023 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,12 @@ Deprecations Bug fixes ~~~~~~~~~ +- :py:meth:`Dataset.set_xindex` now raises a helpful error when a custom index + creates extra variables that don't match the provided coordinate names, instead + of silently ignoring them. The error message suggests using the factory method + pattern with :py:meth:`xarray.Coordinates.from_xindex` and + :py:meth:`Dataset.assign_coords` for advanced use cases (:issue:`10499`). + By `Dhruva Kumar Kaushal `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6de626a159b..ac4bfc32df5 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4940,6 +4940,20 @@ def set_xindex( if isinstance(index, PandasMultiIndex): coord_names = [index.dim] + list(coord_names) + # Check for extra variables that don't match the coordinate names + extra_vars = set(new_coord_vars) - set(coord_names) + if extra_vars: + extra_vars_str = ", ".join(f"'{name}'" for name in extra_vars) + coord_names_str = ", ".join(f"'{name}'" for name in coord_names) + raise ValueError( + f"The index created extra variables {extra_vars_str} that are not " + f"in the list of coordinates {coord_names_str}. " + f"Use a factory method pattern instead:\n" + f" index = {index_cls.__name__}.from_variables(ds, {list(coord_names)!r})\n" + f" coords = xr.Coordinates.from_xindex(index)\n" + f" ds = ds.assign_coords(coords)" + ) + variables: dict[Hashable, Variable] indexes: dict[Hashable, Index] From de22fee3b405ca32ff45205ea1b6c5e9c91fb8f2 Mon Sep 17 00:00:00 2001 From: DHRUVA KUMAR KAUSHAL Date: Tue, 8 Jul 2025 23:52:24 +0530 Subject: [PATCH 2/4] adding tests --- xarray/tests/test_indexes.py | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index 2b7900d9c89..186cd0eeb96 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -729,3 +729,60 @@ def test_restore_dtype_on_multiindexes(dtype: str) -> None: foo = xr.Dataset(coords={"bar": ("bar", np.array([0, 1], dtype=dtype))}) foo = foo.stack(baz=("bar",)) assert str(foo["bar"].values.dtype) == dtype + + +class IndexWithExtraVariables(Index): + @classmethod + def from_variables(cls, variables, *, options=None): + return cls() + + def create_variables(self, variables=None): + if variables is None: + return {} + + result = dict(variables) + if "time" in variables: + result["valid_time"] = Variable( + dims=("time",), + data=variables["time"].data + 1, + attrs={"description": "time + 1"}, + ) + return result + + +def test_set_xindex_with_extra_variables() -> None: + """Test that set_xindex raises an error when custom index creates extra variables.""" + import xarray as xr + + ds = xr.Dataset(coords={"time": [1, 2, 3]}).reset_index("time") + + # Test that set_xindex raises error for extra variables + with pytest.raises(ValueError, match="extra variables") as excinfo: + ds.set_xindex("time", IndexWithExtraVariables) + + error_msg = str(excinfo.value) + + assert "extra variables" in error_msg + assert "'valid_time'" in error_msg + assert "factory method pattern" in error_msg + assert "from_variables" in error_msg + assert "Coordinates.from_xindex" in error_msg + assert "assign_coords" in error_msg + + +def test_set_xindex_factory_method_pattern() -> None: + import xarray as xr + + ds = xr.Dataset(coords={"time": [1, 2, 3]}).reset_index("time") + + # For the factory pattern, we need to create the variables directly + # since Coordinates.from_xindex() calls create_variables() without arguments + coord_vars = {"time": ds._variables["time"]} + index = IndexWithExtraVariables.from_variables(coord_vars) + new_vars = index.create_variables(coord_vars) + + result = ds.assign_coords(new_vars) + + assert "time" in result.variables + assert "valid_time" in result.variables + assert_array_equal(result.valid_time.data, [2, 3, 4]) # time + 1 From 2e6c137b529a887efb70bbf101ed1c59a200c66a Mon Sep 17 00:00:00 2001 From: DHRUVA KUMAR KAUSHAL Date: Wed, 9 Jul 2025 15:45:17 +0530 Subject: [PATCH 3/4] resolving comments --- xarray/tests/test_indexes.py | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index 186cd0eeb96..feb9a77dafe 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -738,7 +738,15 @@ def from_variables(cls, variables, *, options=None): def create_variables(self, variables=None): if variables is None: - return {} + # For Coordinates.from_xindex(), return all variables the index can create + return { + "time": Variable(dims=("time",), data=[1, 2, 3]), + "valid_time": Variable( + dims=("time",), + data=[2, 3, 4], # time + 1 + attrs={"description": "time + 1"}, + ), + } result = dict(variables) if "time" in variables: @@ -752,37 +760,24 @@ def create_variables(self, variables=None): def test_set_xindex_with_extra_variables() -> None: """Test that set_xindex raises an error when custom index creates extra variables.""" - import xarray as xr ds = xr.Dataset(coords={"time": [1, 2, 3]}).reset_index("time") # Test that set_xindex raises error for extra variables - with pytest.raises(ValueError, match="extra variables") as excinfo: + with pytest.raises(ValueError, match="extra variables 'valid_time'"): ds.set_xindex("time", IndexWithExtraVariables) - error_msg = str(excinfo.value) - - assert "extra variables" in error_msg - assert "'valid_time'" in error_msg - assert "factory method pattern" in error_msg - assert "from_variables" in error_msg - assert "Coordinates.from_xindex" in error_msg - assert "assign_coords" in error_msg - def test_set_xindex_factory_method_pattern() -> None: - import xarray as xr ds = xr.Dataset(coords={"time": [1, 2, 3]}).reset_index("time") - # For the factory pattern, we need to create the variables directly - # since Coordinates.from_xindex() calls create_variables() without arguments + # Test the recommended factory method pattern coord_vars = {"time": ds._variables["time"]} index = IndexWithExtraVariables.from_variables(coord_vars) - new_vars = index.create_variables(coord_vars) - - result = ds.assign_coords(new_vars) + coords = xr.Coordinates.from_xindex(index) + result = ds.assign_coords(coords) assert "time" in result.variables assert "valid_time" in result.variables - assert_array_equal(result.valid_time.data, [2, 3, 4]) # time + 1 + assert_array_equal(result.valid_time.data, result.time.data + 1) From 7debc63479331ee6c438e1eea25092acd9e79cdf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 9 Jul 2025 10:18:38 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_indexes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index feb9a77dafe..9f2eea48260 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -769,7 +769,6 @@ def test_set_xindex_with_extra_variables() -> None: def test_set_xindex_factory_method_pattern() -> None: - ds = xr.Dataset(coords={"time": [1, 2, 3]}).reset_index("time") # Test the recommended factory method pattern