-
Notifications
You must be signed in to change notification settings - Fork 33
Open
Description
My understanding is that there are five possible JSON declarations for the bytes
codec:
"bytes"
{"name": "bytes"}
{"name": "bytes", "configuration": {}}
{"name": "bytes", "configuration": {"endian": "little"}}
{"name": "bytes", "configuration": {"endian": "big"}}
First, am I correct in this census or is there a detail from the spec I am missing?
Second, unless I'm misreading the spec, we have defined 3 different ways of saying the exact same thing (items 1-3). Putting aside the merits of this design, I'm curious what the different zarr v3 implementations support. cc @LDeakin @jbms
right now zarr-python only supports options 2 - 5. See the script and results below.
demo script
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "zarr @ git+https://github.com/zarr-developers/zarr-python.git@27615fd",
# "pytest"
# ]
# ///
from typing import Any
import zarr
import pytest
from zarr.core.metadata.v3 import ArrayV3Metadata
bytes_codec_specs = [
"bytes",
{"name": "bytes"},
{"name": "bytes", "configuration": {}},
{"name": "bytes", "configuration": {"endian": "little"}},
{"name": "bytes", "configuration": {"endian": "big"}},
]
@pytest.mark.parametrize('bytes_codec_spec', bytes_codec_specs, ids=[str(x) for x in bytes_codec_specs])
def test(bytes_codec_spec: str | dict[str, Any]) -> None:
data = {
"node_type": "array",
"fill_value": 0,
"zarr_format": 3,
"shape": [100, 100, 3],
"data_type": "uint8",
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": [10, 10, 3]}},
"chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}},
"codecs": [bytes_codec_spec]
}
ArrayV3Metadata.from_dict(data)
if __name__ == "__main__":
pytest.main([__file__, f'-c {__file__}'])
test.py3.11-1.25-minimalbennettd@dvb-desktop-0 ➜ zarr-python git:(feat/numcodecs-compat) ✗ uv run test.py
Reading inline script metadata from `test.py`
Updated https://github.com/zarr-developers/zarr-python.git (27615fd0)
Cannot read termcap database;
using dumb terminal settings.
============================================================================================================================================= test session starts =============================================================================================================================================
platform linux -- Python 3.11.5, pytest-8.4.1, pluggy-1.6.0
rootdir: /home/bennettd/dev/zarr-python/ /home/bennettd/dev/zarr-python
configfile: test.py
collected 5 items
/home/bennettd/dev/zarr-python F.... [100%]
================================================================================================================================================== FAILURES ===================================================================================================================================================
_________________________________________________________________________________________________________________________________________________ test[bytes] _________________________________________________________________________________________________________________________________________________
bytes_codec_spec = 'bytes'
@pytest.mark.parametrize('bytes_codec_spec', bytes_codec_specs, ids=[str(x) for x in bytes_codec_specs])
def test(bytes_codec_spec: str | dict[str, Any]) -> None:
data = {
"node_type": "array",
"fill_value": 0,
"zarr_format": 3,
"shape": [100, 100, 3],
"data_type": "uint8",
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": [10, 10, 3]}},
"chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}},
"codecs": [bytes_codec_spec]
}
> ArrayV3Metadata.from_dict(data)
test.py:34:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../.cache/uv/archive-v0/DL3T_HZF_GHlo1WkQHjiQ/lib/python3.11/site-packages/zarr/core/metadata/v3.py:323: in from_dict
return cls(**_data, fill_value=fill_value_parsed, data_type=data_type) # type: ignore[arg-type]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
../../.cache/uv/archive-v0/DL3T_HZF_GHlo1WkQHjiQ/lib/python3.11/site-packages/zarr/core/metadata/v3.py:177: in __init__
codecs_parsed_partial = parse_codecs(codecs)
^^^^^^^^^^^^^^^^^^^^
../../.cache/uv/archive-v0/DL3T_HZF_GHlo1WkQHjiQ/lib/python3.11/site-packages/zarr/core/metadata/v3.py:66: in parse_codecs
name_parsed, _ = parse_named_configuration(c, require_configuration=False)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
data = 'bytes', expected_name = None
def parse_named_configuration(
data: JSON, expected_name: str | None = None, *, require_configuration: bool = True
) -> tuple[str, JSON | None]:
if not isinstance(data, dict):
> raise TypeError(f"Expected dict, got {type(data)}")
E TypeError: Expected dict, got <class 'str'>
../../.cache/uv/archive-v0/DL3T_HZF_GHlo1WkQHjiQ/lib/python3.11/site-packages/zarr/core/common.py:130: TypeError
=========================================================================================================================================== short test summary info ===========================================================================================================================================
FAILED /home/bennettd/dev/zarr-python::test[bytes] - TypeError: Expected dict, got <class 'str'>
========================================================================================================================================= 1 failed, 4 passed in 0.08s =========================================================================================================================================
Metadata
Metadata
Assignees
Labels
No labels