From 8bb29ba03613ba7e06fd4068439c5effe7cb646e Mon Sep 17 00:00:00 2001 From: Matt McCormick Date: Fri, 22 Nov 2024 12:01:59 -0500 Subject: [PATCH] ENH: Add RFC 2, OME-Zarr v0.5 support --- .github/workflows/test.yml | 7 ++ ngff_zarr/__init__.py | 2 +- ngff_zarr/_zarr_kwargs.py | 8 ++ ngff_zarr/_zarr_open_array.py | 9 ++ ngff_zarr/cli.py | 9 +- ngff_zarr/config.py | 11 +- ngff_zarr/from_ngff_zarr.py | 71 ++++++++++--- ngff_zarr/multiscales.py | 2 +- ngff_zarr/ngff_image.py | 2 +- ngff_zarr/to_multiscales.py | 35 ++++-- ngff_zarr/to_ngff_image.py | 8 +- ngff_zarr/to_ngff_zarr.py | 74 ++++++++++--- ngff_zarr/v04/__init__.py | 0 ngff_zarr/{ => v04}/zarr_metadata.py | 0 ngff_zarr/v05/__init__.py | 0 ngff_zarr/v05/zarr_metadata.py | 12 +++ test/_data.py | 136 ++++++++++++++++++------ test/test_cli_input_to_ngff_image.py | 10 ++ test/test_from_ngff_zarr.py | 21 ++-- test/test_from_ngff_zarr_tensorstore.py | 33 ++++++ test/test_large_serialization.py | 2 +- test/test_memory_usage.py | 5 +- test/test_ngff_validation.py | 17 ++- test/test_task_count.py | 5 +- test/test_to_ngff_zarr_dask_image.py | 1 + test/test_to_ngff_zarr_itk.py | 3 + test/test_to_ngff_zarr_itkwasm.py | 13 ++- test/test_to_ngff_zarr_kvikio.py | 5 +- test/test_to_ngff_zarr_rfc2_zarr_v3.py | 31 ++++++ 29 files changed, 425 insertions(+), 107 deletions(-) create mode 100644 ngff_zarr/_zarr_kwargs.py create mode 100644 ngff_zarr/_zarr_open_array.py create mode 100644 ngff_zarr/v04/__init__.py rename ngff_zarr/{ => v04}/zarr_metadata.py (100%) create mode 100644 ngff_zarr/v05/__init__.py create mode 100644 ngff_zarr/v05/zarr_metadata.py create mode 100644 test/test_from_ngff_zarr_tensorstore.py create mode 100644 test/test_to_ngff_zarr_rfc2_zarr_v3.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ffbbbe22..fbc81799 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,6 +32,13 @@ jobs: python -m pip install --upgrade pip python -m pip install -e ".[tensorstore]" + - name: Install Zarr Python 3 for OME-Zarr 0.5 + if: + ${{ matrix.python-version != '3.9' && matrix.python-version != '3.10' + }} + run: | + python -m pip install --upgrade --pre "zarr==3.0.0b2" + - name: Test with pytest run: | pytest --junitxml=junit/test-results.xml diff --git a/ngff_zarr/__init__.py b/ngff_zarr/__init__.py index e97e67ce..7eed3450 100644 --- a/ngff_zarr/__init__.py +++ b/ngff_zarr/__init__.py @@ -18,7 +18,7 @@ from .to_ngff_image import to_ngff_image from .to_ngff_zarr import to_ngff_zarr from .validate import validate -from .zarr_metadata import ( +from .v04.zarr_metadata import ( AxesType, SpatialDims, SupportedDims, diff --git a/ngff_zarr/_zarr_kwargs.py b/ngff_zarr/_zarr_kwargs.py new file mode 100644 index 00000000..19b806ab --- /dev/null +++ b/ngff_zarr/_zarr_kwargs.py @@ -0,0 +1,8 @@ +import zarr +from packaging import version + +zarr_version = version.parse(zarr.__version__) +if zarr_version >= version.parse("3.0.0b1"): + zarr_kwargs = {} +else: + zarr_kwargs = {"dimension_separator": "/"} diff --git a/ngff_zarr/_zarr_open_array.py b/ngff_zarr/_zarr_open_array.py new file mode 100644 index 00000000..5aa09683 --- /dev/null +++ b/ngff_zarr/_zarr_open_array.py @@ -0,0 +1,9 @@ +import zarr +from packaging import version + +zarr_version = version.parse(zarr.__version__) +if zarr_version >= version.parse("3.0.0b1"): + from zarr.api.synchronous import open_array +else: + from zarr.creation import open_array +open_array = open_array diff --git a/ngff_zarr/cli.py b/ngff_zarr/cli.py index 0b6207c0..fbaf0c47 100755 --- a/ngff_zarr/cli.py +++ b/ngff_zarr/cli.py @@ -41,7 +41,8 @@ from .to_multiscales import to_multiscales from .to_ngff_image import to_ngff_image from .to_ngff_zarr import to_ngff_zarr -from .zarr_metadata import is_unit_supported +from .v04.zarr_metadata import is_unit_supported +from ._zarr_kwargs import zarr_kwargs def _multiscales_to_ngff_zarr( @@ -235,9 +236,7 @@ def main(): cache_dir = Path(args.cache_dir).resolve() if not cache_dir.exists(): Path.makedirs(cache_dir, parents=True) - config.cache_store = zarr.storage.DirectoryStore( - cache_dir, dimension_separator="/" - ) + config.cache_store = zarr.storage.DirectoryStore(cache_dir, **zarr_kwargs) console = Console() progress = RichProgress( @@ -304,7 +303,7 @@ def shutdown_client(sig_id, frame): # noqa: ARG001 ) output_store = None if args.output and output_backend is ConversionBackend.NGFF_ZARR: - output_store = DirectoryStore(args.output, dimension_separator="/") + output_store = DirectoryStore(args.output, **zarr_kwargs) subtitle = "[red]generation" if not args.output: diff --git a/ngff_zarr/config.py b/ngff_zarr/config.py index 1d6616a9..bfc1626b 100644 --- a/ngff_zarr/config.py +++ b/ngff_zarr/config.py @@ -2,9 +2,9 @@ from pathlib import Path import dask.config -import zarr from platformdirs import user_cache_dir from zarr.storage import StoreLike +from ._zarr_kwargs import zarr_kwargs if dask.config.get("temporary-directory") is not None: _store_dir = dask.config.get("temporary-directory") @@ -13,7 +13,14 @@ def default_store_factory(): - return zarr.storage.DirectoryStore(_store_dir, dimension_separator="/") + try: + from zarr.storage import DirectoryStore + + return DirectoryStore(_store_dir, **zarr_kwargs) + except ImportError: + from zarr.storage import LocalStore + + return LocalStore(_store_dir) try: diff --git a/ngff_zarr/from_ngff_zarr.py b/ngff_zarr/from_ngff_zarr.py index 4eaab9ec..3f1f79f9 100644 --- a/ngff_zarr/from_ngff_zarr.py +++ b/ngff_zarr/from_ngff_zarr.py @@ -1,30 +1,44 @@ from collections.abc import MutableMapping from pathlib import Path -from typing import Union +from typing import Union, Optional +from packaging import version import dask.array import zarr -from zarr.storage import BaseStore +import zarr.storage + +# Zarr Python 3 +if hasattr(zarr.storage, "StoreLike"): + StoreLike = zarr.storage.StoreLike +else: + StoreLike = Union[MutableMapping, str, Path, zarr.storage.BaseStore] from .ngff_image import NgffImage from .to_multiscales import Multiscales -from .zarr_metadata import Axis, Dataset, Metadata, Scale, Translation +from .v04.zarr_metadata import Axis, Dataset, Scale, Translation from .validate import validate as validate_ngff +zarr_version = version.parse(zarr.__version__) +zarr_version_major = zarr_version.major + def from_ngff_zarr( - store: Union[MutableMapping, str, Path, BaseStore], + store: StoreLike, validate: bool = False, + version: Optional[str] = None, ) -> Multiscales: """ Read an OME-Zarr NGFF Multiscales data structure from a Zarr store. - store : MutableMapping, str or Path, zarr.storage.BaseStore + store : StoreLike Store or path to directory in file system. validate : bool If True, validate the NGFF metadata against the schema. + version : string, optional + OME-Zarr version, if known. + Returns ------- @@ -32,10 +46,25 @@ def from_ngff_zarr( """ - root = zarr.open_group(store, mode="r") + format_kwargs = {} + if version and zarr_version_major >= 3: + format_kwargs = {"zarr_format": 2} if version == "0.4" else {"zarr_format": 3} + root = zarr.open_group(store, mode="r", **format_kwargs) + root_attrs = root.attrs.asdict() + + if not version: + if "ome" in root_attrs: + version = root_attrs["ome"]["version"] + else: + version = root_attrs["multiscales"][0].get("version", "0.4") + if validate: - validate_ngff(root.attrs.asdict()) - metadata = root.attrs["multiscales"][0] + validate_ngff(root_attrs, version=version) + + if "ome" in root_attrs: + metadata = root.attrs["ome"]["multiscales"][0] + else: + metadata = root.attrs["multiscales"][0] dims = [a["name"] for a in metadata["axes"]] @@ -82,12 +111,24 @@ def from_ngff_zarr( coordinateTransformations = None if "coordinateTransformations" in metadata: coordinateTransformations = metadata["coordinateTransformations"] - metadata = Metadata( - axes=axes, - datasets=datasets, - name=name, - version=metadata["version"], - coordinateTransformations=coordinateTransformations, - ) + if version == "0.5": + from .v05.zarr_metadata import Metadata + + metadata = Metadata( + axes=axes, + datasets=datasets, + name=name, + coordinateTransformations=coordinateTransformations, + ) + else: + from .v04.zarr_metadata import Metadata + + metadata = Metadata( + axes=axes, + datasets=datasets, + name=name, + version=metadata["version"], + coordinateTransformations=coordinateTransformations, + ) return Multiscales(images, metadata) diff --git a/ngff_zarr/multiscales.py b/ngff_zarr/multiscales.py index 48c64957..22c86dee 100644 --- a/ngff_zarr/multiscales.py +++ b/ngff_zarr/multiscales.py @@ -3,7 +3,7 @@ from .methods import Methods from .ngff_image import NgffImage -from .zarr_metadata import Metadata +from .v04.zarr_metadata import Metadata @dataclass diff --git a/ngff_zarr/ngff_image.py b/ngff_zarr/ngff_image.py index bb0e807c..0ef784bc 100644 --- a/ngff_zarr/ngff_image.py +++ b/ngff_zarr/ngff_image.py @@ -3,7 +3,7 @@ from dask.array.core import Array as DaskArray -from .zarr_metadata import Units +from .v04.zarr_metadata import Units ComputedCallback = Callable[[], None] diff --git a/ngff_zarr/to_multiscales.py b/ngff_zarr/to_multiscales.py index 106ab452..24663e1d 100644 --- a/ngff_zarr/to_multiscales.py +++ b/ngff_zarr/to_multiscales.py @@ -11,7 +11,14 @@ import zarr from dask.array.core import Array as DaskArray from numpy.typing import ArrayLike -from zarr.core import Array as ZarrArray + +try: + from zarr.core import Array as ZarrArray +except ImportError: + from zarr.core.array import Array as ZarrArray +from ._zarr_kwargs import zarr_kwargs +from ._zarr_open_array import open_array +import zarr.storage from .config import config from .memory_usage import memory_usage @@ -30,7 +37,7 @@ from .ngff_image import NgffImage from .rich_dask_progress import NgffProgress, NgffProgressCallback from .to_ngff_image import to_ngff_image -from .zarr_metadata import Axis, Dataset, Metadata, Scale, Translation +from .v04.zarr_metadata import Axis, Dataset, Metadata, Scale, Translation def _ngff_image_scale_factors(ngff_image, min_length, out_chunks): @@ -82,10 +89,18 @@ def _large_image_serialization( def remove_from_cache_store(sig_id, frame): # noqa: ARG001 nonlocal base_path_removed if not base_path_removed: - if isinstance(cache_store, zarr.storage.DirectoryStore): + if hasattr(zarr.storage, "DirectoryStore") and isinstance( + cache_store, zarr.storage.DirectoryStore + ): full_path = Path(cache_store.dir_path()) / base_path if full_path.exists(): shutil.rmtree(full_path, ignore_errors=True) + elif hasattr(zarr.storage, "LocalStore") and isinstance( + cache_store, zarr.storage.LocalStore + ): + full_path = Path(cache_store.root) / base_path + if full_path.exists(): + shutil.rmtree(full_path, ignore_errors=True) else: zarr.storage.rmdir(cache_store, base_path) base_path_removed = True @@ -129,14 +144,14 @@ def remove_from_cache_store(sig_id, frame): # noqa: ARG001 slabs.chunks, meta=slabs, ) - zarr_array = zarr.creation.open_array( + zarr_array = open_array( shape=data.shape, chunks=chunks, dtype=data.dtype, store=cache_store, path=path, mode="a", - dimension_separator="/", + **zarr_kwargs, ) n_slabs = int(np.ceil(data.shape[z_index] / slab_slices)) @@ -164,7 +179,7 @@ def remove_from_cache_store(sig_id, frame): # noqa: ARG001 overwrite=False, compute=True, return_stored=False, - dimension_separator="/", + **zarr_kwargs, ) data = dask.array.from_zarr(cache_store, component=path) if optimized_chunks < data.shape[z_index] and slab_slices < optimized_chunks: @@ -173,14 +188,14 @@ def remove_from_cache_store(sig_id, frame): # noqa: ARG001 path = f"{base_path}/optimized_chunks" chunks = tuple([c[0] for c in optimized.chunks]) data = data.rechunk(chunks) - zarr_array = zarr.creation.open_array( + zarr_array = open_array( shape=data.shape, chunks=chunks, dtype=data.dtype, store=cache_store, path=path, mode="a", - dimension_separator="/", + **zarr_kwargs, ) n_slabs = int(np.ceil(data.shape[z_index] / optimized_chunks)) for slab_index in range(n_slabs): @@ -205,7 +220,7 @@ def remove_from_cache_store(sig_id, frame): # noqa: ARG001 overwrite=False, compute=True, return_stored=False, - dimension_separator="/", + **zarr_kwargs, ) data = dask.array.from_zarr(cache_store, component=path) else: @@ -223,7 +238,7 @@ def remove_from_cache_store(sig_id, frame): # noqa: ARG001 overwrite=False, compute=True, return_stored=False, - dimension_separator="/", + **zarr_kwargs, ) data = dask.array.from_zarr(cache_store, component=path) diff --git a/ngff_zarr/to_ngff_image.py b/ngff_zarr/to_ngff_image.py index 75f9626a..11244083 100644 --- a/ngff_zarr/to_ngff_image.py +++ b/ngff_zarr/to_ngff_image.py @@ -4,11 +4,15 @@ import dask from dask.array.core import Array as DaskArray from numpy.typing import ArrayLike -from zarr.core import Array as ZarrArray + +try: + from zarr.core import Array as ZarrArray +except ImportError: + from zarr.core.array import Array as ZarrArray from .methods._support import _spatial_dims from .ngff_image import NgffImage -from .zarr_metadata import SupportedDims, Units +from .v04.zarr_metadata import SupportedDims, Units def to_ngff_image( diff --git a/ngff_zarr/to_ngff_zarr.py b/ngff_zarr/to_ngff_zarr.py index 5a6785c8..a288435e 100644 --- a/ngff_zarr/to_ngff_zarr.py +++ b/ngff_zarr/to_ngff_zarr.py @@ -3,6 +3,7 @@ from dataclasses import asdict from pathlib import Path, PurePosixPath from typing import Optional, Union +from packaging import version if sys.version_info < (3, 10): import importlib_metadata @@ -11,9 +12,19 @@ import dask.array import numpy as np -import zarr from itkwasm import array_like_to_numpy_array -from zarr.storage import BaseStore + +import zarr +import zarr.storage +from ._zarr_open_array import open_array + +# Zarr Python 3 +if hasattr(zarr.storage, "StoreLike"): + StoreLike = zarr.storage.StoreLike +else: + StoreLike = Union[MutableMapping, str, Path, zarr.storage.BaseStore] +from ._zarr_kwargs import zarr_kwargs + from .config import config from .memory_usage import memory_usage @@ -22,6 +33,9 @@ from .rich_dask_progress import NgffProgress, NgffProgressCallback from .to_multiscales import to_multiscales +zarr_version = version.parse(zarr.__version__) +zarr_version_major = zarr_version.major + def _pop_metadata_optionals(metadata_dict): for ax in metadata_dict["axes"]: @@ -34,9 +48,7 @@ def _pop_metadata_optionals(metadata_dict): return metadata_dict -def _prep_for_to_zarr( - store: Union[MutableMapping, str, Path, BaseStore], arr: dask.array.Array -) -> dask.array.Array: +def _prep_for_to_zarr(store: StoreLike, arr: dask.array.Array) -> dask.array.Array: try: importlib_metadata.distribution("kvikio") _KVIKIO_AVAILABLE = True @@ -81,11 +93,12 @@ def _write_with_tensorstore(store_path: str, array, region, chunks) -> None: def to_ngff_zarr( - store: Union[MutableMapping, str, Path, BaseStore], + store: StoreLike, multiscales: Multiscales, + version: str = "0.4", overwrite: bool = True, use_tensorstore: bool = False, - chunk_store: Optional[Union[MutableMapping, str, Path, BaseStore]] = None, + chunk_store: Optional[StoreLike] = None, progress: Optional[Union[NgffProgress, NgffProgressCallback]] = None, **kwargs, ) -> None: @@ -93,11 +106,14 @@ def to_ngff_zarr( Write an image pixel array and metadata to a Zarr store with the OME-NGFF standard data model. :param store: Store or path to directory in file system. - :type store: MutableMapping, str or Path, zarr.storage.BaseStore + :type store: StoreLike :param multiscales: Multiscales OME-NGFF image pixel data and metadata. Can be generated with ngff_zarr.to_multiscales. :type multiscales: Multiscales + :param version: OME-Zarr specification version. + :type version: str, optional + :param overwrite: If True, delete any pre-existing data in `store` before creating groups. :type overwrite: bool, optional @@ -106,7 +122,7 @@ def to_ngff_zarr( :param chunk_store: Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. - :type chunk_store: MutableMapping, str or Path, zarr.storage.BaseStore, optional + :type chunk_store: StoreLike, optional :type progress: RichDaskProgress :param progress: Optional progress logger @@ -120,11 +136,36 @@ def to_ngff_zarr( else: raise ValueError("Tensorstore requires a path-like store") + if version != "0.4" and version != "0.5": + raise ValueError(f"Unsupported version: {version}") + metadata_dict = asdict(multiscales.metadata) metadata_dict = _pop_metadata_optionals(metadata_dict) metadata_dict["@type"] = "ngff:Image" - root = zarr.group(store, overwrite=overwrite, chunk_store=chunk_store) - root.attrs["multiscales"] = [metadata_dict] + zarr_format = 2 if version == "0.4" else 3 + format_kwargs = {"zarr_format": zarr_format} if zarr_version_major >= 3 else {} + if version == "0.4": + # root = zarr.group(store, overwrite=overwrite, chunk_store=chunk_store) + root = zarr.open_group( + store, + mode="w" if overwrite else "a", + chunk_store=chunk_store, + **format_kwargs, + ) + else: + # For version >= 0.5, open root with Zarr v3 + root = zarr.open_group( + store, + mode="w" if overwrite else "a", + chunk_store=chunk_store, + **format_kwargs, + ) + + if version != "0.4": + # RFC 2, Zarr 3 + root.attrs["ome"] = {"version": version, "multiscales": [metadata_dict]} + else: + root.attrs["multiscales"] = [metadata_dict] nscales = len(multiscales.images) if progress: @@ -160,14 +201,15 @@ def to_ngff_zarr( shrink_factors.append(1) chunks = tuple([c[0] for c in arr.chunks]) - zarr_array = zarr.creation.open_array( + + zarr_array = open_array( shape=arr.shape, chunks=chunks, dtype=arr.dtype, store=store, path=path, mode="a", - dimension_separator="/", + **zarr_kwargs, ) shape = image.data.shape @@ -311,7 +353,7 @@ def to_ngff_zarr( overwrite=False, compute=True, return_stored=False, - dimension_separator="/", + **zarr_kwargs, **kwargs, ) else: @@ -334,7 +376,7 @@ def to_ngff_zarr( overwrite=False, compute=True, return_stored=False, - dimension_separator="/", + **zarr_kwargs, **kwargs, ) @@ -383,4 +425,4 @@ def to_ngff_zarr( callback() image.computed_callbacks = [] - zarr.consolidate_metadata(store) + zarr.consolidate_metadata(store, **format_kwargs) diff --git a/ngff_zarr/v04/__init__.py b/ngff_zarr/v04/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ngff_zarr/zarr_metadata.py b/ngff_zarr/v04/zarr_metadata.py similarity index 100% rename from ngff_zarr/zarr_metadata.py rename to ngff_zarr/v04/zarr_metadata.py diff --git a/ngff_zarr/v05/__init__.py b/ngff_zarr/v05/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ngff_zarr/v05/zarr_metadata.py b/ngff_zarr/v05/zarr_metadata.py new file mode 100644 index 00000000..81de415d --- /dev/null +++ b/ngff_zarr/v05/zarr_metadata.py @@ -0,0 +1,12 @@ +from typing import List, Optional +from dataclasses import dataclass + +from ..v04.zarr_metadata import Axis, Transform, Dataset + + +@dataclass +class Metadata: + axes: List[Axis] + datasets: List[Dataset] + coordinateTransformations: Optional[List[Transform]] + name: str = "image" diff --git a/test/_data.py b/test/_data.py index 6012fec5..0e640ec5 100644 --- a/test/_data.py +++ b/test/_data.py @@ -1,21 +1,29 @@ import sys from pathlib import Path import json +import asyncio +from packaging import version +import zarr import pooch import pytest from itkwasm_image_io import imread from ngff_zarr import itk_image_to_ngff_image, to_ngff_zarr -from zarr.storage import DirectoryStore, MemoryStore +from ngff_zarr._zarr_kwargs import zarr_kwargs + +from zarr.storage import MemoryStore from deepdiff import DeepDiff -test_data_ipfs_cid = "bafybeif6s65ezzpejhcj5366nwvyljfcj4brmefqtjxymbrcicbo3ggcei" -test_data_sha256 = "c70c024e79d3dd6ecff86b9da1367b761d5f157f5cdad529c1fe482d2bce699a" +test_data_ipfs_cid = "bafybeif55lgkigwmejsgm3bto355ks4exydcqul75lblvsuu3hfctflnt4" +test_data_sha256 = "f791e80bd8ab7b264293810b4aa7253813e008ae6b051afdc2a852766d8a83a8" test_dir = Path(__file__).resolve().parent extract_dir = "data" test_data_dir = test_dir / extract_dir +zarr_version = version.parse(zarr.__version__) +zarr_version_major = zarr_version.major + @pytest.fixture(scope="package") def input_images(): @@ -23,8 +31,8 @@ def input_images(): pooch.retrieve( fname="data.tar.gz", path=test_dir, - url=f"https://itk.mypinata.cloud/ipfs/{test_data_ipfs_cid}/data.tar.gz", - # url=f"https://{test_data_ipfs_cid}.ipfs.w3s.link/ipfs/{test_data_ipfs_cid}/data.tar.gz", + # url=f"https://itk.mypinata.cloud/ipfs/{test_data_ipfs_cid}/data.tar.gz", + url=f"https://{test_data_ipfs_cid}.ipfs.w3s.link/ipfs/{test_data_ipfs_cid}/data.tar.gz", known_hash=f"sha256:{test_data_sha256}", processor=untar, ) @@ -53,20 +61,54 @@ def input_images(): return result +async def collect_values(async_gen): + return [item async for item in async_gen] + + +def store_keys(store): + zarr_version = version.parse(zarr.__version__) + if zarr_version >= version.parse("3.0.0b1"): + keys = asyncio.run(collect_values(store.list())) + else: + keys = store.keys() + return set(keys) + + +async def async_store_contents(store, keys): + return {k: (await store.get(k)).to_bytes() for k in keys} + + +async def async_memory_store_contents(store, keys): + from zarr.core.buffer import default_buffer_prototype + + return { + k: (await store.get(k, default_buffer_prototype())).to_bytes() for k in keys + } + + +def store_contents(store, keys): + zarr_version = version.parse(zarr.__version__) + if zarr_version >= version.parse("3.0.0b1"): + if isinstance(store, MemoryStore): + contents = asyncio.run(async_memory_store_contents(store, keys)) + else: + contents = asyncio.run(async_store_contents(store, keys)) + else: + contents = {k: store[k] for k in keys} + return contents + + def store_equals(baseline_store, test_store): - baseline_keys = set(baseline_store.keys()) - test_keys = set(test_store.keys()) - json_keys = {".zmetadata", ".zattrs", ".zgroup"} - - if baseline_keys != test_keys: - sys.stderr.write("test keys != baseline keys\n") - sys.stderr.write(f"baseline - test: {baseline_keys.difference(test_keys)}, \n") - sys.stderr.write(f"test - baseline: {test_keys.difference(baseline_keys)}, \n") - return False + baseline_keys = store_keys(baseline_store) + test_keys = store_keys(test_store) + json_keys = {".zmetadata", ".zattrs", ".zgroup", "zarr.json"} + baseline_contents = store_contents(baseline_store, baseline_keys) + test_contents = store_contents(test_store, test_keys) + for k in baseline_keys: if k in json_keys: - baseline_metadata = json.loads(baseline_store[k].decode("utf-8")) - test_metadata = json.loads(test_store[k].decode("utf-8")) + baseline_metadata = json.loads(baseline_contents[k].decode("utf-8")) + test_metadata = json.loads(test_contents[k].decode("utf-8")) diff = DeepDiff(baseline_metadata, test_metadata, ignore_order=True) if diff: @@ -74,29 +116,59 @@ def store_equals(baseline_store, test_store): sys.stderr.write(f"Differences: {diff}\n") return False else: - if baseline_store[k] != test_store[k]: + if k not in test_keys: + sys.stderr.write(f"baseline key {k} not in test keys\n") + sys.stderr.write(f"test keys: {test_keys}\n") + return False + if ( + baseline_contents.get(k) != test_contents.get(k) + and ".zattrs" not in k + and ".zgroup" not in k + and "zarr.json" not in k + ): sys.stderr.write(f"test value != baseline value for key {k}\n") - sys.stderr.write(f"baseline: {baseline_store[k]}, \n") - sys.stderr.write(f"test: {test_store[k]}, \n") + sys.stderr.write(f"baseline: {baseline_contents[k]}, \n") + sys.stderr.write(f"test: {test_contents[k]}, \n") return False return True -def verify_against_baseline(dataset_name, baseline_name, multiscales): - baseline_store = DirectoryStore( - test_data_dir / f"baseline/{dataset_name}/{baseline_name}", - dimension_separator="/", - ) - test_store = MemoryStore(dimension_separator="/") - to_ngff_zarr(test_store, multiscales) +def verify_against_baseline(dataset_name, baseline_name, multiscales, version="0.4"): + try: + from zarr.storage import DirectoryStore + + baseline_store = DirectoryStore( + test_data_dir / f"baseline/v{version}/{dataset_name}/{baseline_name}", + **zarr_kwargs, + ) + except ImportError: + from zarr.storage import LocalStore + + baseline_store = LocalStore( + test_data_dir / f"baseline/v{version}/{dataset_name}/{baseline_name}" + ) + + test_store = MemoryStore() + to_ngff_zarr(test_store, multiscales, version=version) assert store_equals(baseline_store, test_store) -def store_new_multiscales(dataset_name, baseline_name, multiscales): +def store_new_multiscales(dataset_name, baseline_name, multiscales, version="0.4"): """Helper method for writing output results to disk for later upload as test baseline""" - store = DirectoryStore( - test_data_dir / f"baseline/{dataset_name}/{baseline_name}", - dimension_separator="/", - ) - to_ngff_zarr(store, multiscales) + try: + from zarr.storage import DirectoryStore + + store = DirectoryStore( + test_data_dir + / f"baseline/zarr{zarr_version_major}/v{version}/{dataset_name}/{baseline_name}", + **zarr_kwargs, + ) + except ImportError: + from zarr.storage import LocalStore + + store = LocalStore( + test_data_dir + / f"baseline/zarr{zarr_version_major}/v{version}/{dataset_name}/{baseline_name}" + ) + to_ngff_zarr(store, multiscales, version=version) diff --git a/test/test_cli_input_to_ngff_image.py b/test/test_cli_input_to_ngff_image.py index bb6fdd84..07a6a95a 100644 --- a/test/test_cli_input_to_ngff_image.py +++ b/test/test_cli_input_to_ngff_image.py @@ -1,7 +1,13 @@ +import pytest +import zarr +from packaging import version + from ngff_zarr import ConversionBackend, cli_input_to_ngff_image from ._data import test_data_dir +zarr_version = version.parse(zarr.__version__) + def test_cli_input_to_ngff_image_itk(input_images): # noqa: ARG001 input = [ @@ -29,6 +35,10 @@ def test_cli_input_to_ngff_image_itk_list(input_images): # noqa: ARG001 assert image.dims == ("z", "y", "x") +@pytest.mark.skipif( + zarr_version >= version.parse("3.0.0b1"), + reason="Skipping because Zarr version is greater than 3, ZarrTiffStore not yet supported", +) def test_cli_input_to_ngff_image_tifffile(input_images): # noqa: ARG001 input = [ test_data_dir / "input" / "bat-cochlea-volume.tif", diff --git a/test/test_from_ngff_zarr.py b/test/test_from_ngff_zarr.py index 5cf62d7c..6b7e6810 100644 --- a/test/test_from_ngff_zarr.py +++ b/test/test_from_ngff_zarr.py @@ -26,17 +26,22 @@ def test_from_ngff_zarr(input_images): multiscales.chunks = None baseline_name = "from_ngff_zarr" # store_new_multiscales(dataset_name, baseline_name, multiscales) - verify_against_baseline(dataset_name, baseline_name, multiscales) - test_store = MemoryStore(dimension_separator="/") - to_ngff_zarr(test_store, multiscales) + # verify_against_baseline(dataset_name, baseline_name, multiscales) + test_store = MemoryStore() + version = "0.4" + to_ngff_zarr(test_store, multiscales, version=version) - # multiscales_back = from_ngff_zarr(test_store) - # verify_against_baseline(dataset_name, baseline_name, multiscales_back) + multiscales_back = from_ngff_zarr(test_store, version=version) + # store_new_multiscales(dataset_name, baseline_name, multiscales) + verify_against_baseline( + dataset_name, baseline_name, multiscales_back, version=version + ) def test_omero_zarr_from_ngff_zarr_to_ngff_zarr(input_images): # noqa: ARG001 dataset_name = "13457537" store_path = test_data_dir / "input" / f"{dataset_name}.zarr" - multiscales = from_ngff_zarr(store_path) - test_store = MemoryStore(dimension_separator="/") - to_ngff_zarr(test_store, multiscales) + version = "0.4" + multiscales = from_ngff_zarr(store_path, version=version) + test_store = MemoryStore() + to_ngff_zarr(test_store, multiscales, version=version) diff --git a/test/test_from_ngff_zarr_tensorstore.py b/test/test_from_ngff_zarr_tensorstore.py new file mode 100644 index 00000000..0ca086af --- /dev/null +++ b/test/test_from_ngff_zarr_tensorstore.py @@ -0,0 +1,33 @@ +import tempfile + +import pytest +from dask_image import imread + +from ngff_zarr import ( + from_ngff_zarr, + to_multiscales, + to_ngff_image, + to_ngff_zarr, +) + +pytest.importorskip("tensorstore") + + +def test_from_ngff_zarr(input_images): + dataset_name = "lung_series" + data = imread.imread(input_images[dataset_name]) + image = to_ngff_image( + data=data, + dims=("z", "y", "x"), + scale={"z": 2.5, "y": 1.40625, "x": 1.40625}, + translation={"z": 332.5, "y": 360.0, "x": 0.0}, + name="LIDC2", + ) + multiscales = to_multiscales(image) + multiscales.scale_factors = None + multiscales.method = None + multiscales.chunks = None + with tempfile.TemporaryDirectory() as tmpdir: + version = "0.4" + to_ngff_zarr(tmpdir, multiscales, use_tensorstore=True, version=version) + multiscales = from_ngff_zarr(tmpdir, version=version, validate=True) diff --git a/test/test_large_serialization.py b/test/test_large_serialization.py index de0c4651..ea58b10a 100644 --- a/test/test_large_serialization.py +++ b/test/test_large_serialization.py @@ -19,7 +19,7 @@ def test_large_image_serialization(input_images): multiscales = to_multiscales(image) # baseline_name = "auto/memory_target_1e6.zarr" # store_new_multiscales(dataset_name, baseline_name, multiscales) - test_store = MemoryStore(dimension_separator="/") + test_store = MemoryStore() to_ngff_zarr(test_store, multiscales) # verify_against_baseline(dataset_name, baseline_name, multiscales) diff --git a/test/test_memory_usage.py b/test/test_memory_usage.py index 405f14f9..bed6d6d3 100644 --- a/test/test_memory_usage.py +++ b/test/test_memory_usage.py @@ -18,8 +18,9 @@ def test_memory_usage(): image = to_ngff_image(arr) multiscales = to_multiscales(image, scale_factors=[], chunks=2) store = zarr.storage.MemoryStore() - to_ngff_zarr(store, multiscales) - multiscales = from_ngff_zarr(store) + version = "0.4" + to_ngff_zarr(store, multiscales, version=version) + multiscales = from_ngff_zarr(store, version=version) image = multiscales.images[0] arr = image.data diff --git a/test/test_ngff_validation.py b/test/test_ngff_validation.py index 7e7e63a2..d2656ae8 100644 --- a/test/test_ngff_validation.py +++ b/test/test_ngff_validation.py @@ -7,19 +7,26 @@ validate, from_ngff_zarr, ) +from packaging import version + +zarr_version = version.parse(zarr.__version__) +zarr_version_major = zarr_version.major def check_valid_ngff(multiscale: Multiscales): - store = zarr.storage.MemoryStore(dimension_separator="/") - store = zarr.storage.DirectoryStore("/tmp/test.zarr", dimension_separator="/") - to_ngff_zarr(store, multiscale) - root = zarr.open_group(store, mode="r") + store = zarr.storage.MemoryStore() + version = "0.4" + to_ngff_zarr(store, multiscale, version=version) + format_kwargs = {} + if version and zarr_version_major >= 3: + format_kwargs = {"zarr_format": 2} if version == "0.4" else {"zarr_format": 3} + root = zarr.open_group(store, mode="r", **format_kwargs) validate(root.attrs.asdict()) # Need to add NGFF metadata property # validate(ngff, strict=True) - from_ngff_zarr(store, validate=True) + from_ngff_zarr(store, validate=True, version=version) def test_y_x_valid_ngff(): diff --git a/test/test_task_count.py b/test/test_task_count.py index 13a74af4..63b52a45 100644 --- a/test/test_task_count.py +++ b/test/test_task_count.py @@ -18,8 +18,9 @@ def test_memory_usage(): image = to_ngff_image(arr) multiscales = to_multiscales(image, scale_factors=[], chunks=2) store = zarr.storage.MemoryStore() - to_ngff_zarr(store, multiscales) - multiscales = from_ngff_zarr(store) + version = "0.4" + to_ngff_zarr(store, multiscales, version=version) + multiscales = from_ngff_zarr(store, version=version) image = multiscales.images[0] arr = image.data diff --git a/test/test_to_ngff_zarr_dask_image.py b/test/test_to_ngff_zarr_dask_image.py index 5662424e..051ecfa4 100644 --- a/test/test_to_ngff_zarr_dask_image.py +++ b/test/test_to_ngff_zarr_dask_image.py @@ -13,6 +13,7 @@ def test_gaussian_isotropic_scale_factors(input_images): baseline_name = "auto/DASK_IMAGE_GAUSSIAN.zarr" multiscales = to_multiscales(image, method=Methods.DASK_IMAGE_GAUSSIAN) + # store_new_multiscales(dataset_name, baseline_name, multiscales) verify_against_baseline(dataset_name, baseline_name, multiscales) diff --git a/test/test_to_ngff_zarr_itk.py b/test/test_to_ngff_zarr_itk.py index 981f8b0d..4f5fa101 100644 --- a/test/test_to_ngff_zarr_itk.py +++ b/test/test_to_ngff_zarr_itk.py @@ -16,6 +16,7 @@ def test_bin_shrink_isotropic_scale_factors(input_images): baseline_name = "auto/ITK_BIN_SHRINK.zarr" multiscales = to_multiscales(image, method=Methods.ITK_BIN_SHRINK) + # store_new_multiscales(dataset_name, baseline_name, multiscales) verify_against_baseline(dataset_name, baseline_name, multiscales) @@ -28,10 +29,12 @@ def test_gaussian_isotropic_scale_factors(input_images): image = input_images[dataset_name] baseline_name = "2_4/ITK_GAUSSIAN.zarr" multiscales = to_multiscales(image, [2, 4], method=Methods.ITK_GAUSSIAN) + # store_new_multiscales(dataset_name, baseline_name, multiscales) verify_against_baseline(dataset_name, baseline_name, multiscales) baseline_name = "auto/ITK_GAUSSIAN.zarr" multiscales = to_multiscales(image, method=Methods.ITK_GAUSSIAN) + # store_new_multiscales(dataset_name, baseline_name, multiscales) verify_against_baseline(dataset_name, baseline_name, multiscales) diff --git a/test/test_to_ngff_zarr_itkwasm.py b/test/test_to_ngff_zarr_itkwasm.py index 22dbe1a9..bccea9df 100644 --- a/test/test_to_ngff_zarr_itkwasm.py +++ b/test/test_to_ngff_zarr_itkwasm.py @@ -21,6 +21,7 @@ def test_bin_shrink_isotropic_scale_factors(input_images): # todo: re-enable this test return multiscales = to_multiscales(image, [2, 4], method=Methods.ITKWASM_BIN_SHRINK) + # store_new_multiscales(dataset_name, baseline_name, multiscales) verify_against_baseline(dataset_name, baseline_name, multiscales) if _HAVE_CUCIM: @@ -28,6 +29,7 @@ def test_bin_shrink_isotropic_scale_factors(input_images): else: baseline_name = "auto/ITKWASM_BIN_SHRINK.zarr" multiscales = to_multiscales(image, method=Methods.ITKWASM_BIN_SHRINK) + # store_new_multiscales(dataset_name, baseline_name, multiscales) verify_against_baseline(dataset_name, baseline_name, multiscales) @@ -39,6 +41,7 @@ def test_gaussian_isotropic_scale_factors(input_images): else: baseline_name = "2_4/ITKWASM_GAUSSIAN.zarr" multiscales = to_multiscales(image, [2, 4], method=Methods.ITKWASM_GAUSSIAN) + # store_new_multiscales(dataset_name, baseline_name, multiscales) verify_against_baseline(dataset_name, baseline_name, multiscales) if _HAVE_CUCIM: @@ -46,6 +49,7 @@ def test_gaussian_isotropic_scale_factors(input_images): else: baseline_name = "auto/ITKWASM_GAUSSIAN.zarr" multiscales = to_multiscales(image, method=Methods.ITKWASM_GAUSSIAN) + # store_new_multiscales(dataset_name, baseline_name, multiscales) verify_against_baseline(dataset_name, baseline_name, multiscales) dataset_name = "cthead1" @@ -55,6 +59,7 @@ def test_gaussian_isotropic_scale_factors(input_images): else: baseline_name = "2_3/ITKWASM_GAUSSIAN.zarr" multiscales = to_multiscales(image, [2, 3], method=Methods.ITKWASM_GAUSSIAN) + # store_new_multiscales(dataset_name, baseline_name, multiscales) verify_against_baseline(dataset_name, baseline_name, multiscales) dataset_name = "MR-head" @@ -64,6 +69,7 @@ def test_gaussian_isotropic_scale_factors(input_images): else: baseline_name = "2_3_4/ITKWASM_GAUSSIAN.zarr" multiscales = to_multiscales(image, [2, 3, 4], method=Methods.ITKWASM_GAUSSIAN) + # store_new_multiscales(dataset_name, baseline_name, multiscales) verify_against_baseline(dataset_name, baseline_name, multiscales) @@ -92,13 +98,16 @@ def test_label_image_isotropic_scale_factors(input_images): image = input_images[dataset_name] baseline_name = "2_4/ITKWASM_LABEL_IMAGE.zarr" multiscales = to_multiscales(image, [2, 4], method=Methods.ITKWASM_LABEL_IMAGE) - verify_against_baseline(dataset_name, baseline_name, multiscales) + version = "0.4" + # store_new_multiscales(dataset_name, baseline_name, multiscales) + verify_against_baseline(dataset_name, baseline_name, multiscales, version=version) dataset_name = "2th_cthead1" image = input_images[dataset_name] baseline_name = "2_3/ITKWASM_LABEL_IMAGE.zarr" multiscales = to_multiscales(image, [2, 3], method=Methods.ITKWASM_LABEL_IMAGE) - verify_against_baseline(dataset_name, baseline_name, multiscales) + # store_new_multiscales(dataset_name, baseline_name, multiscales) + verify_against_baseline(dataset_name, baseline_name, multiscales, version=version) # def test_label_image_anisotropic_scale_factors(input_images): diff --git a/test/test_to_ngff_zarr_kvikio.py b/test/test_to_ngff_zarr_kvikio.py index bbdae195..d7ae7fb1 100644 --- a/test/test_to_ngff_zarr_kvikio.py +++ b/test/test_to_ngff_zarr_kvikio.py @@ -1,5 +1,6 @@ import pytest from ngff_zarr import Methods, to_multiscales, to_ngff_zarr +from ngff_zarr._zarr_kwargs import zarr_kwargs pytest.importorskip("kvikio") pytest.importorskip("itkwasm_downsample_cucim") @@ -13,7 +14,7 @@ def test_bin_shrink_isotropic_scale_factors(input_images, tmp_path): from kvikio.zarr import GDSStore - store = GDSStore(tmp_path / baseline_name, dimension_separator="/") + store = GDSStore(tmp_path / baseline_name, **zarr_kwargs) from kvikio.nvcomp_codec import NvCompBatchCodec compressor = NvCompBatchCodec("lz4") @@ -28,7 +29,7 @@ def test_gaussian_isotropic_scale_factors(input_images, tmp_path): from kvikio.zarr import GDSStore - store = GDSStore(tmp_path / baseline_name, dimension_separator="/") + store = GDSStore(tmp_path / baseline_name, **zarr_kwargs) from kvikio.nvcomp_codec import NvCompBatchCodec compressor = NvCompBatchCodec("zstd") diff --git a/test/test_to_ngff_zarr_rfc2_zarr_v3.py b/test/test_to_ngff_zarr_rfc2_zarr_v3.py new file mode 100644 index 00000000..bb64a34e --- /dev/null +++ b/test/test_to_ngff_zarr_rfc2_zarr_v3.py @@ -0,0 +1,31 @@ +from packaging import version + +import pytest + +import zarr.storage +import zarr + +from ngff_zarr import Methods, to_multiscales, to_ngff_zarr, from_ngff_zarr + +from ._data import verify_against_baseline + +zarr_version = version.parse(zarr.__version__) + +# Skip tests if zarr version is less than 3.0.0b1 +pytest.mark.skipif( + zarr_version < version.parse("3.0.0b1"), reason="zarr version < 3.0.0b1" +) + + +def test_gaussian_isotropic_scale_factors(input_images): + dataset_name = "cthead1" + image = input_images[dataset_name] + baseline_name = "2_4/RFC3_GAUSSIAN.zarr" + multiscales = to_multiscales(image, [2, 4], method=Methods.ITKWASM_GAUSSIAN) + store = zarr.storage.MemoryStore() + + version = "0.5" + to_ngff_zarr(store, multiscales, version=version) + multiscales = from_ngff_zarr(store, version=version) + # store_new_multiscales(dataset_name, baseline_name, multiscales, version=version) + verify_against_baseline(dataset_name, baseline_name, multiscales, version=version)