|
| 1 | +import fsspec |
1 | 2 | import pytest
|
2 | 3 | import xarray as xr
|
3 | 4 | import xarray.testing as xrt
|
4 | 5 |
|
5 | 6 | from virtualizarr import open_virtual_dataset
|
6 | 7 |
|
7 | 8 |
|
| 9 | +def test_kerchunk_roundtrip_no_concat(tmpdir): |
| 10 | + # set up example xarray dataset |
| 11 | + ds = xr.tutorial.open_dataset("air_temperature", decode_times=False) |
| 12 | + |
| 13 | + # save it to disk as netCDF (in temporary directory) |
| 14 | + ds.to_netcdf(f"{tmpdir}/air.nc") |
| 15 | + |
| 16 | + # use open_dataset_via_kerchunk to read it as references |
| 17 | + vds = open_virtual_dataset(f"{tmpdir}/air.nc", indexes={}) |
| 18 | + |
| 19 | + # write those references to disk as kerchunk json |
| 20 | + vds.virtualize.to_kerchunk(f"{tmpdir}/refs.json", format="json") |
| 21 | + |
| 22 | + # use fsspec to read the dataset from disk via the zarr store |
| 23 | + fs = fsspec.filesystem("reference", fo=f"{tmpdir}/refs.json") |
| 24 | + m = fs.get_mapper("") |
| 25 | + |
| 26 | + roundtrip = xr.open_dataset(m, engine="kerchunk") |
| 27 | + |
| 28 | + # assert equal to original dataset |
| 29 | + xrt.assert_equal(roundtrip, ds) |
| 30 | + |
| 31 | + |
| 32 | +def test_kerchunk_roundtrip_concat(tmpdir): |
| 33 | + # set up example xarray dataset |
| 34 | + ds = xr.tutorial.open_dataset("air_temperature", decode_times=False).isel( |
| 35 | + time=slice(None, 2000) |
| 36 | + ) |
| 37 | + |
| 38 | + # split into two datasets |
| 39 | + ds1, ds2 = ds.isel(time=slice(None, 1000)), ds.isel(time=slice(1000, None)) |
| 40 | + |
| 41 | + # save it to disk as netCDF (in temporary directory) |
| 42 | + ds1.to_netcdf(f"{tmpdir}/air1.nc") |
| 43 | + ds2.to_netcdf(f"{tmpdir}/air2.nc") |
| 44 | + |
| 45 | + # use open_dataset_via_kerchunk to read it as references |
| 46 | + vds1 = open_virtual_dataset(f"{tmpdir}/air1.nc", indexes={}) |
| 47 | + vds2 = open_virtual_dataset(f"{tmpdir}/air2.nc", indexes={}) |
| 48 | + |
| 49 | + # concatenate virtually along time |
| 50 | + vds = xr.concat([vds1, vds2], dim="time", coords="minimal", compat="override") |
| 51 | + print(vds["air"].variable._data) |
| 52 | + |
| 53 | + # write those references to disk as kerchunk json |
| 54 | + vds.virtualize.to_kerchunk(f"{tmpdir}/refs.json", format="json") |
| 55 | + |
| 56 | + # use fsspec to read the dataset from disk via the zarr store |
| 57 | + fs = fsspec.filesystem("reference", fo=f"{tmpdir}/refs.json") |
| 58 | + m = fs.get_mapper("") |
| 59 | + |
| 60 | + roundtrip = xr.open_dataset(m, engine="kerchunk") |
| 61 | + |
| 62 | + # user does analysis here |
| 63 | + |
| 64 | + # assert equal to original dataset |
| 65 | + xrt.assert_equal(roundtrip, ds) |
| 66 | + |
| 67 | + |
8 | 68 | def test_open_scalar_variable(tmpdir):
|
9 | 69 | # regression test for GH issue #100
|
10 | 70 |
|
|
0 commit comments