-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Check if zarr store supports consolidated metadata #10457
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 23 commits
01e7518
83e553b
e44326d
4e4eeb0
d858059
d377780
3132f6a
900eef5
4c4462f
5b9b749
fadb953
57d9d23
11170fc
0b8fa41
f769f85
4eef318
29242a4
5d15bbd
b4ed8ee
6a23bfb
d87b209
08cb041
9cc9105
c817869
50cff3e
a47b2e2
76f16fe
9b39529
e174d9c
5b441b2
e395c60
1829807
de02ced
8167b83
1e30664
101b280
26a402c
1ce9d87
ad4d79c
09915dd
8252f9f
e550851
470e701
586c7a2
e673608
758eea4
e3f0d7b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -115,7 +115,9 @@ | |
import zarr.codecs | ||
|
||
if has_zarr_v3: | ||
from zarr.abc.store import Store | ||
from zarr.storage import MemoryStore as KVStore | ||
from zarr.storage import WrapperStore | ||
|
||
ZARR_FORMATS = [2, 3] | ||
else: | ||
|
@@ -1062,7 +1064,7 @@ | |
encoded = Dataset({"x": ("t", sb, attributes)}) | ||
unsigned_dtype = np.dtype(f"u{sb.dtype.itemsize}") | ||
|
||
with _roundtrip_with_warnings(decoded) as actual: | ||
Check failure on line 1067 in xarray/tests/test_backends.py
|
||
for k in decoded.variables: | ||
assert decoded.variables[k].dtype == actual.variables[k].dtype | ||
exp_fv = decoded.variables[k].encoding["_FillValue"] | ||
|
@@ -1138,7 +1140,7 @@ | |
|
||
def test_grid_mapping_and_bounds_are_not_coordinates_in_file(self) -> None: | ||
original = self._create_cf_dataset() | ||
with self.roundtrip(original, open_kwargs={"decode_coords": False}) as ds: | ||
Check failure on line 1143 in xarray/tests/test_backends.py
|
||
assert ds.coords["latitude"].attrs["bounds"] == "latitude_bnds" | ||
assert ds.coords["longitude"].attrs["bounds"] == "longitude_bnds" | ||
assert "coordinates" not in ds["variable"].attrs | ||
|
@@ -1146,7 +1148,7 @@ | |
|
||
def test_coordinate_variables_after_dataset_roundtrip(self) -> None: | ||
original = self._create_cf_dataset() | ||
with self.roundtrip(original, open_kwargs={"decode_coords": "all"}) as actual: | ||
assert_identical(actual, original) | ||
|
||
with self.roundtrip(original) as actual: | ||
|
@@ -1171,7 +1173,7 @@ | |
# xarray/tests/test_conventions.py::TestCFEncodedDataStore | ||
# needs the to_dataset. The other backends should be fine | ||
# without it. | ||
with pytest.warns( | ||
Check failure on line 1176 in xarray/tests/test_backends.py
|
||
UserWarning, | ||
match=( | ||
r"Variable\(s\) referenced in bounds not in variables: " | ||
|
@@ -1203,7 +1205,7 @@ | |
{"temp": ("x", [0, 1]), "precip": ("x", [0, -1])}, | ||
{"lat": ("x", [2, 3]), "lon": ("x", [4, 5])}, | ||
) | ||
with self.roundtrip(original) as actual: | ||
assert_identical(actual, original) | ||
with self.roundtrip(original, open_kwargs=dict(decode_coords=False)) as ds: | ||
assert equals_latlon(ds["temp"].attrs["coordinates"]) | ||
|
@@ -1241,7 +1243,7 @@ | |
} | ||
) | ||
|
||
with self.roundtrip(ds) as actual: | ||
# technically these datasets are slightly different, | ||
# one hold mixed endian data (ds) the other should be | ||
# all big endian (actual). assertDatasetIdentical | ||
|
@@ -1273,7 +1275,7 @@ | |
ds = Dataset({"x": ("y", np.arange(10.0))}) | ||
|
||
kwargs: dict[str, Any] = dict(encoding={"x": {"dtype": "f4"}}) | ||
with self.roundtrip(ds, save_kwargs=kwargs) as actual: | ||
encoded_dtype = actual.x.encoding["dtype"] | ||
# On OS X, dtype sometimes switches endianness for unclear reasons | ||
assert encoded_dtype.kind == "f" and encoded_dtype.itemsize == 4 | ||
|
@@ -1298,7 +1300,7 @@ | |
ds = Dataset({"t": pd.date_range("2000-01-01", periods=3)}) | ||
units = "days since 1900-01-01" | ||
kwargs = dict(encoding={"t": {"units": units}}) | ||
with self.roundtrip(ds, save_kwargs=kwargs) as actual: | ||
assert actual.t.encoding["units"] == units | ||
assert_identical(actual, ds) | ||
|
||
|
@@ -1351,20 +1353,20 @@ | |
def test_explicitly_omit_fill_value_in_coord(self) -> None: | ||
ds = Dataset({"x": ("y", [np.pi, -np.pi])}, coords={"y": [0.0, 1.0]}) | ||
ds.y.encoding["_FillValue"] = None | ||
with self.roundtrip(ds) as actual: | ||
assert "_FillValue" not in actual.y.encoding | ||
|
||
def test_explicitly_omit_fill_value_in_coord_via_encoding_kwarg(self) -> None: | ||
ds = Dataset({"x": ("y", [np.pi, -np.pi])}, coords={"y": [0.0, 1.0]}) | ||
kwargs = dict(encoding={"y": {"_FillValue": None}}) | ||
with self.roundtrip(ds, save_kwargs=kwargs) as actual: | ||
Check failure on line 1362 in xarray/tests/test_backends.py
|
||
assert "_FillValue" not in actual.y.encoding | ||
assert ds.y.encoding == {} | ||
|
||
def test_encoding_same_dtype(self) -> None: | ||
ds = Dataset({"x": ("y", np.arange(10.0, dtype="f4"))}) | ||
kwargs = dict(encoding={"x": {"dtype": "f4"}}) | ||
with self.roundtrip(ds, save_kwargs=kwargs) as actual: | ||
encoded_dtype = actual.x.encoding["dtype"] | ||
# On OS X, dtype sometimes switches endianness for unclear reasons | ||
assert encoded_dtype.kind == "f" and encoded_dtype.itemsize == 4 | ||
|
@@ -1380,7 +1382,7 @@ | |
# regression for GH1215 | ||
data = create_test_data() | ||
with create_tmp_file(allow_cleanup_failure=False) as tmp_file: | ||
self.save(data, tmp_file, mode="w") | ||
data["var2"][:] = -999 | ||
data["var9"] = data["var2"] * 3 | ||
self.save(data[["var2", "var9"]], tmp_file, mode="a") | ||
|
@@ -1403,7 +1405,7 @@ | |
x=["y", "z"] | ||
) | ||
with pytest.raises(NotImplementedError, match=r"MultiIndex"): | ||
with self.roundtrip(ds): | ||
pass | ||
|
||
# regression GH8628 (can serialize reset multi-index level coordinates) | ||
|
@@ -1423,7 +1425,7 @@ | |
) | ||
} | ||
).chunk() | ||
with pytest.warns(SerializationWarning, match="dask array with dtype=object"): | ||
Check failure on line 1428 in xarray/tests/test_backends.py
|
||
with self.roundtrip(original) as actual: | ||
assert_identical(original, actual) | ||
|
||
|
@@ -1441,7 +1443,7 @@ | |
# regression test for GH8909 | ||
ds = xr.Dataset() | ||
ds["A"] = xr.DataArray([[1, "a"], [2, "b"]], dims=["x", "y"]) | ||
with self.roundtrip(ds) as ds2: | ||
Check failure on line 1446 in xarray/tests/test_backends.py
|
||
expected = ds2.sel(indexer) | ||
with self.roundtrip(expected) as actual: | ||
assert_identical(actual, expected) | ||
|
@@ -2375,7 +2377,7 @@ | |
@pytest.mark.parametrize("consolidated", [False, True, None]) | ||
def test_roundtrip_consolidated(self, consolidated) -> None: | ||
expected = create_test_data() | ||
with self.roundtrip( | ||
Check failure on line 2380 in xarray/tests/test_backends.py
|
||
expected, | ||
save_kwargs={"consolidated": consolidated}, | ||
open_kwargs={"backend_kwargs": {"consolidated": consolidated}}, | ||
|
@@ -2386,7 +2388,7 @@ | |
def test_read_non_consolidated_warning(self) -> None: | ||
expected = create_test_data() | ||
with self.create_zarr_target() as store: | ||
self.save( | ||
Check failure on line 2391 in xarray/tests/test_backends.py
|
||
expected, store_target=store, consolidated=False, **self.version_kwargs | ||
) | ||
with pytest.warns( | ||
|
@@ -2400,7 +2402,7 @@ | |
with pytest.raises( | ||
FileNotFoundError, match="(No such file or directory|Unable to find group)" | ||
): | ||
xr.open_zarr(f"{uuid.uuid4()}") | ||
Check failure on line 2405 in xarray/tests/test_backends.py
|
||
|
||
@pytest.mark.skipif(has_zarr_v3, reason="chunk_store not implemented in zarr v3") | ||
def test_with_chunkstore(self) -> None: | ||
|
@@ -2421,7 +2423,7 @@ | |
def test_auto_chunk(self) -> None: | ||
original = create_test_data().chunk() | ||
|
||
with self.roundtrip(original, open_kwargs={"chunks": None}) as actual: | ||
Check failure on line 2426 in xarray/tests/test_backends.py
|
||
for k, v in actual.variables.items(): | ||
# only index variables should be in memory | ||
assert v._in_memory == (k in actual.dims) | ||
|
@@ -2442,7 +2444,7 @@ | |
|
||
# Using chunks = None should return non-chunked arrays | ||
open_kwargs: dict[str, Any] = {"chunks": None} | ||
with self.roundtrip(original, open_kwargs=open_kwargs) as actual: | ||
Check failure on line 2447 in xarray/tests/test_backends.py
|
||
for k, v in actual.variables.items(): | ||
# only index variables should be in memory | ||
assert v._in_memory == (k in actual.dims) | ||
|
@@ -2486,7 +2488,7 @@ | |
bad_chunks = (2, {"dim2": (3, 3, 2, 1)}) | ||
for chunks in bad_chunks: | ||
kwargs = {"chunks": chunks} | ||
with pytest.warns(UserWarning): | ||
Check failure on line 2491 in xarray/tests/test_backends.py
|
||
with self.roundtrip(original, open_kwargs=kwargs) as actual: | ||
for k, v in actual.variables.items(): | ||
# only index variables should be in memory | ||
|
@@ -2511,7 +2513,7 @@ | |
def test_deprecate_auto_chunk(self) -> None: | ||
original = create_test_data().chunk() | ||
with pytest.raises(TypeError): | ||
with self.roundtrip(original, open_kwargs={"auto_chunk": True}) as actual: | ||
Check failure on line 2516 in xarray/tests/test_backends.py
|
||
for k, v in actual.variables.items(): | ||
# only index variables should be in memory | ||
assert v._in_memory == (k in actual.dims) | ||
|
@@ -2530,7 +2532,7 @@ | |
def test_write_uneven_dask_chunks(self) -> None: | ||
# regression for GH#2225 | ||
original = create_test_data().chunk({"dim1": 3, "dim2": 4, "dim3": 3}) | ||
with self.roundtrip(original, open_kwargs={"chunks": {}}) as actual: | ||
Check failure on line 2535 in xarray/tests/test_backends.py
|
||
for k, v in actual.data_vars.items(): | ||
assert v.chunks == actual[k].chunks | ||
|
||
|
@@ -3744,6 +3746,29 @@ | |
assert actual["var1"].encoding["chunks"] == (2, 2) | ||
|
||
|
||
class NoConsolidatedMetadataSupportStore(WrapperStore[Store]): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: I don't think I even want to try and define this class if zarr-python v3 is not installed |
||
""" | ||
Store that explicitly does not support consolidated metadata. | ||
|
||
Useful as a proxy for stores like Icechunk, see https://github.com/zarr-developers/zarr-python/pull/3119. | ||
""" | ||
|
||
supports_consolidated_metadata = False | ||
|
||
|
||
@requires_zarr | ||
class TestZarrNoConsolidatedMetadataSupport(ZarrBase): | ||
@contextlib.contextmanager | ||
def create_zarr_target(self): | ||
# TODO the zarr version would need to be >3.08 for the supports_consolidated_metadata property to have any effect | ||
if has_zarr_v3: | ||
yield NoConsolidatedMetadataSupportStore( | ||
zarr.storage.MemoryStore({}, read_only=False) | ||
) | ||
else: | ||
pytest.skip("requires zarr v3") | ||
|
||
|
||
@requires_zarr | ||
@pytest.mark.skipif( | ||
ON_WINDOWS, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this will be needed for Zarr >3.0.0,<3.1.0
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right - I had something like that then removed it 🤦
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added in a47b2e2