Skip to content

WIP: Add functionality to virtualize GeoTIFFs using async_tiff #524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 19 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ hdf = [
"imagecodecs",
"imagecodecs-numcodecs==2024.6.1",
]

tiff = [
"virtual_tiff @ git+https://github.com/virtual-zarr/virtual-tiff",
]
# kerchunk-based parsers
netcdf3 = [
"virtualizarr[remote]",
Expand Down Expand Up @@ -165,6 +167,9 @@ h5netcdf = ">=1.5.0,<2"
[tool.pixi.feature.icechunk-dev.dependencies]
rust = "*"

[tool.pixi.feature.rio.dependencies]
rioxarray = "*"

[tool.pixi.feature.minimum-versions.dependencies]
xarray = "==2025.3.0"
numpy = "==2.0.0"
Expand All @@ -187,10 +192,10 @@ run-tests-html-cov = { cmd = "pytest -n auto --run-network-tests --verbose --cov
[tool.pixi.environments]
min-deps = ["dev", "test", "hdf", "hdf5-lib"] # VirtualiZarr/conftest.py using h5py, so the minimum set of dependencies for testing still includes hdf libs
# Inherit from min-deps to get all the test commands, along with optional dependencies
test = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib"]
test-py311 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py311"] # test against python 3.11
test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py312"] # test against python 3.12
minio = ["dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312", "minio"]
test = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "tiff", "rio"]
test-py311 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py311", "tiff", "rio"] # test against python 3.11
test-py312 = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "py312", "tiff", "rio"] # test against python 3.12
minio = ["dev", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "hdf5-lib", "py312", "minio", "tiff", "rio"]
minimum-versions = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk", "kerchunk_parquet", "hdf5-lib", "minimum-versions"]
upstream = ["dev", "test", "hdf", "hdf5-lib", "netcdf3", "upstream", "icechunk-dev"]
all = ["dev", "test", "remote", "hdf", "netcdf3", "fits", "icechunk", "kerchunk","kerchunk_parquet", "hdf5-lib", "all_parsers", "all_writers"]
Expand Down
75 changes: 0 additions & 75 deletions virtualizarr/parsers/tiff.py

This file was deleted.

3 changes: 2 additions & 1 deletion virtualizarr/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ def _importorskip(
has_s3fs, requires_s3fs = _importorskip("s3fs")
has_lithops, requires_lithops = _importorskip("lithops")
has_scipy, requires_scipy = _importorskip("scipy")
has_tifffile, requires_tifffile = _importorskip("tifffile")
has_asynctiff, requires_asynctiff = _importorskip("async_tiff")
has_imagecodecs, requires_imagecodecs = _importorskip("imagecodecs")
has_hdf5plugin, requires_hdf5plugin = _importorskip("hdf5plugin")
has_zarr_python, requires_zarr_python = _importorskip("zarr")
has_dask, requires_dask = _importorskip("dask")
has_obstore, requires_obstore = _importorskip("obstore")
has_rioxarray, requires_rioxarray = _importorskip("rioxarray")
9 changes: 9 additions & 0 deletions virtualizarr/tests/test_parsers/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@
warnings.warn("hdf5plugin is required for HDF reader")


@pytest.fixture
def geotiff_file(tmp_path: Path) -> str:
"""Create a NetCDF4 file with air temperature data."""
filepath = tmp_path / "air.tif"
with xr.tutorial.open_dataset("air_temperature") as ds:
ds.isel(time=0).rio.to_raster(filepath, driver="COG", COMPRESS="DEFLATE")
return str(filepath)


@pytest.fixture
def empty_chunks_hdf5_file(tmpdir):
ds = xr.Dataset({"data": []})
Expand Down
20 changes: 20 additions & 0 deletions virtualizarr/tests/test_parsers/test_tiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np
import xarray as xr
from virtual_tiff import TIFFParser

from virtualizarr.tests import requires_asynctiff, requires_rioxarray


@requires_asynctiff
@requires_rioxarray
def test_read_geotiff(geotiff_file):
import rioxarray
from obstore.store import LocalStore

parser = TIFFParser(ifd=0)
ms = parser(file_url=f"file://{geotiff_file}", object_store=LocalStore())
ds = xr.open_dataset(ms, engine="zarr", consolidated=False, zarr_format=3).load()
assert isinstance(ds, xr.Dataset)
expected = rioxarray.open_rasterio(geotiff_file).data.squeeze()
observed = ds["0"].data.squeeze()
np.testing.assert_allclose(observed, expected)
Loading