Skip to content

Commit 5bf2cf4

Browse files
authored
Optimize writes to existing Zarr stores. (#8875)
* Optimize writes to existing Zarr stores. We need to read existing variables to make sure we append or write to a region with the right encoding. Currently we request all arrays in a Zarr group. Instead only request those arrays for which we require encoding information. * Add test * fix test
1 parent 2120808 commit 5bf2cf4

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

xarray/backends/zarr.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,12 @@ def store(
623623
# avoid needing to load index variables into memory.
624624
# TODO: consider making loading indexes lazy again?
625625
existing_vars, _, _ = conventions.decode_cf_variables(
626-
self.get_variables(), self.get_attrs()
626+
{
627+
k: v
628+
for k, v in self.get_variables().items()
629+
if k in existing_variable_names
630+
},
631+
self.get_attrs(),
627632
)
628633
# Modified variables must use the same encoding as the store.
629634
vars_with_encoding = {}

xarray/tests/test_backends.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2261,7 +2261,6 @@ def test_write_uneven_dask_chunks(self) -> None:
22612261
original = create_test_data().chunk({"dim1": 3, "dim2": 4, "dim3": 3})
22622262
with self.roundtrip(original, open_kwargs={"chunks": {}}) as actual:
22632263
for k, v in actual.data_vars.items():
2264-
print(k)
22652264
assert v.chunks == actual[k].chunks
22662265

22672266
def test_chunk_encoding(self) -> None:
@@ -2468,6 +2467,27 @@ def test_group(self) -> None:
24682467
) as actual:
24692468
assert_identical(original, actual)
24702469

2470+
def test_zarr_mode_w_overwrites_encoding(self) -> None:
2471+
import zarr
2472+
2473+
data = Dataset({"foo": ("x", [1.0, 1.0, 1.0])})
2474+
with self.create_zarr_target() as store:
2475+
data.to_zarr(
2476+
store, **self.version_kwargs, encoding={"foo": {"add_offset": 1}}
2477+
)
2478+
np.testing.assert_equal(
2479+
zarr.open_group(store, **self.version_kwargs)["foo"], data.foo.data - 1
2480+
)
2481+
data.to_zarr(
2482+
store,
2483+
**self.version_kwargs,
2484+
encoding={"foo": {"add_offset": 0}},
2485+
mode="w",
2486+
)
2487+
np.testing.assert_equal(
2488+
zarr.open_group(store, **self.version_kwargs)["foo"], data.foo.data
2489+
)
2490+
24712491
def test_encoding_kwarg_fixed_width_string(self) -> None:
24722492
# not relevant for zarr, since we don't use EncodedStringCoder
24732493
pass

0 commit comments

Comments
 (0)