Skip to content

Commit cf36559

Browse files
authored
Don't allow overwriting indexes with region writes (#8877)
* Don't allow overwriting indexes with region writes Closes #8589 * Fix typing * one more typing fix
1 parent 473b87f commit cf36559

File tree

3 files changed

+27
-30
lines changed

3 files changed

+27
-30
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ New Features
4141
Breaking changes
4242
~~~~~~~~~~~~~~~~
4343

44+
- Don't allow overwriting index variables with ``to_zarr`` region writes. (:issue:`8589`, :pull:`8876`).
45+
By `Deepak Cherian <https://github.com/dcherian>`_.
4446

4547
Deprecations
4648
~~~~~~~~~~~~

xarray/backends/api.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,24 +1562,19 @@ def _auto_detect_regions(ds, region, open_kwargs):
15621562
return region
15631563

15641564

1565-
def _validate_and_autodetect_region(
1566-
ds, region, mode, open_kwargs
1567-
) -> tuple[dict[str, slice], bool]:
1565+
def _validate_and_autodetect_region(ds, region, mode, open_kwargs) -> dict[str, slice]:
15681566
if region == "auto":
15691567
region = {dim: "auto" for dim in ds.dims}
15701568

15711569
if not isinstance(region, dict):
15721570
raise TypeError(f"``region`` must be a dict, got {type(region)}")
15731571

15741572
if any(v == "auto" for v in region.values()):
1575-
region_was_autodetected = True
15761573
if mode != "r+":
15771574
raise ValueError(
15781575
f"``mode`` must be 'r+' when using ``region='auto'``, got {mode}"
15791576
)
15801577
region = _auto_detect_regions(ds, region, open_kwargs)
1581-
else:
1582-
region_was_autodetected = False
15831578

15841579
for k, v in region.items():
15851580
if k not in ds.dims:
@@ -1612,7 +1607,7 @@ def _validate_and_autodetect_region(
16121607
f".drop_vars({non_matching_vars!r})"
16131608
)
16141609

1615-
return region, region_was_autodetected
1610+
return region
16161611

16171612

16181613
def _validate_datatypes_for_zarr_append(zstore, dataset):
@@ -1784,12 +1779,9 @@ def to_zarr(
17841779
storage_options=storage_options,
17851780
zarr_version=zarr_version,
17861781
)
1787-
region, region_was_autodetected = _validate_and_autodetect_region(
1788-
dataset, region, mode, open_kwargs
1789-
)
1790-
# drop indices to avoid potential race condition with auto region
1791-
if region_was_autodetected:
1792-
dataset = dataset.drop_vars(dataset.indexes)
1782+
region = _validate_and_autodetect_region(dataset, region, mode, open_kwargs)
1783+
# can't modify indexed with region writes
1784+
dataset = dataset.drop_vars(dataset.indexes)
17931785
if append_dim is not None and append_dim in region:
17941786
raise ValueError(
17951787
f"cannot list the same dimension in both ``append_dim`` and "

xarray/tests/test_backends.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
import tempfile
1414
import uuid
1515
import warnings
16-
from collections.abc import Generator, Iterator
16+
from collections.abc import Generator, Iterator, Mapping
1717
from contextlib import ExitStack
1818
from io import BytesIO
1919
from os import listdir
2020
from pathlib import Path
21-
from typing import TYPE_CHECKING, Any, Final, cast
21+
from typing import TYPE_CHECKING, Any, Final, Literal, cast
2222
from unittest.mock import patch
2323

2424
import numpy as np
@@ -5651,24 +5651,27 @@ def test_zarr_region_index_write(self, tmp_path):
56515651
}
56525652
)
56535653

5654-
ds_region = 1 + ds.isel(x=slice(2, 4), y=slice(6, 8))
5654+
region_slice = dict(x=slice(2, 4), y=slice(6, 8))
5655+
ds_region = 1 + ds.isel(region_slice)
56555656

56565657
ds.to_zarr(tmp_path / "test.zarr")
56575658

5658-
with patch.object(
5659-
ZarrStore,
5660-
"set_variables",
5661-
side_effect=ZarrStore.set_variables,
5662-
autospec=True,
5663-
) as mock:
5664-
ds_region.to_zarr(tmp_path / "test.zarr", region="auto", mode="r+")
5665-
5666-
# should write the data vars but never the index vars with auto mode
5667-
for call in mock.call_args_list:
5668-
written_variables = call.args[1].keys()
5669-
assert "test" in written_variables
5670-
assert "x" not in written_variables
5671-
assert "y" not in written_variables
5659+
region: Mapping[str, slice] | Literal["auto"]
5660+
for region in [region_slice, "auto"]: # type: ignore
5661+
with patch.object(
5662+
ZarrStore,
5663+
"set_variables",
5664+
side_effect=ZarrStore.set_variables,
5665+
autospec=True,
5666+
) as mock:
5667+
ds_region.to_zarr(tmp_path / "test.zarr", region=region, mode="r+")
5668+
5669+
# should write the data vars but never the index vars with auto mode
5670+
for call in mock.call_args_list:
5671+
written_variables = call.args[1].keys()
5672+
assert "test" in written_variables
5673+
assert "x" not in written_variables
5674+
assert "y" not in written_variables
56725675

56735676
def test_zarr_region_append(self, tmp_path):
56745677
x = np.arange(0, 50, 10)

0 commit comments

Comments
 (0)