Skip to content

Commit ca99d5a

Browse files
Test fsspec roundtrip (#42)
* move kerchunk backend imports to be specific to each backend filetype * test roundtrip to json file then reading using fsspec * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add test env dependencies * more test env deps * more * add pip install of xarray PR * correct pip url * roundtrip test involving concatenation * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove duplication of pooch * correct formatting * try removing netcdf4-python from the environment --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent f9ca667 commit ca99d5a

File tree

2 files changed

+63
-3
lines changed

2 files changed

+63
-3
lines changed

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ dependencies = [
2828
"numpy",
2929
"ujson",
3030
"packaging",
31-
"universal-pathlib"
32-
31+
"universal-pathlib",
3332
]
3433

3534
[project.optional-dependencies]
@@ -39,8 +38,9 @@ test = [
3938
"pytest-mypy",
4039
"pytest-cov",
4140
"pytest",
42-
"scipy",
41+
"fsspec",
4342
"pooch",
43+
"scipy",
4444
"ruff",
4545
"fastparquet",
4646
"s3fs"

virtualizarr/tests/test_integration.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,70 @@
1+
import fsspec
12
import pytest
23
import xarray as xr
34
import xarray.testing as xrt
45

56
from virtualizarr import open_virtual_dataset
67

78

9+
def test_kerchunk_roundtrip_no_concat(tmpdir):
10+
# set up example xarray dataset
11+
ds = xr.tutorial.open_dataset("air_temperature", decode_times=False)
12+
13+
# save it to disk as netCDF (in temporary directory)
14+
ds.to_netcdf(f"{tmpdir}/air.nc")
15+
16+
# use open_dataset_via_kerchunk to read it as references
17+
vds = open_virtual_dataset(f"{tmpdir}/air.nc", indexes={})
18+
19+
# write those references to disk as kerchunk json
20+
vds.virtualize.to_kerchunk(f"{tmpdir}/refs.json", format="json")
21+
22+
# use fsspec to read the dataset from disk via the zarr store
23+
fs = fsspec.filesystem("reference", fo=f"{tmpdir}/refs.json")
24+
m = fs.get_mapper("")
25+
26+
roundtrip = xr.open_dataset(m, engine="kerchunk")
27+
28+
# assert equal to original dataset
29+
xrt.assert_equal(roundtrip, ds)
30+
31+
32+
def test_kerchunk_roundtrip_concat(tmpdir):
33+
# set up example xarray dataset
34+
ds = xr.tutorial.open_dataset("air_temperature", decode_times=False).isel(
35+
time=slice(None, 2000)
36+
)
37+
38+
# split into two datasets
39+
ds1, ds2 = ds.isel(time=slice(None, 1000)), ds.isel(time=slice(1000, None))
40+
41+
# save it to disk as netCDF (in temporary directory)
42+
ds1.to_netcdf(f"{tmpdir}/air1.nc")
43+
ds2.to_netcdf(f"{tmpdir}/air2.nc")
44+
45+
# use open_dataset_via_kerchunk to read it as references
46+
vds1 = open_virtual_dataset(f"{tmpdir}/air1.nc", indexes={})
47+
vds2 = open_virtual_dataset(f"{tmpdir}/air2.nc", indexes={})
48+
49+
# concatenate virtually along time
50+
vds = xr.concat([vds1, vds2], dim="time", coords="minimal", compat="override")
51+
print(vds["air"].variable._data)
52+
53+
# write those references to disk as kerchunk json
54+
vds.virtualize.to_kerchunk(f"{tmpdir}/refs.json", format="json")
55+
56+
# use fsspec to read the dataset from disk via the zarr store
57+
fs = fsspec.filesystem("reference", fo=f"{tmpdir}/refs.json")
58+
m = fs.get_mapper("")
59+
60+
roundtrip = xr.open_dataset(m, engine="kerchunk")
61+
62+
# user does analysis here
63+
64+
# assert equal to original dataset
65+
xrt.assert_equal(roundtrip, ds)
66+
67+
868
def test_open_scalar_variable(tmpdir):
969
# regression test for GH issue #100
1070

0 commit comments

Comments
 (0)