From ed56505d015444d349c3f47be635089d3edb50c8 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Jul 2025 16:13:25 +0200 Subject: [PATCH 01/16] add parse_dtype as ergonomic replacement for parse_data_type, handle more JSON-like inputs, and test for round-trips --- src/zarr/core/array.py | 6 +-- src/zarr/core/dtype/__init__.py | 66 ++++++++++++++++++++++-- src/zarr/dtype.py | 2 + tests/test_array.py | 4 +- tests/test_dtype_registry.py | 42 ++++++--------- tests/test_metadata/test_consolidated.py | 4 +- 6 files changed, 87 insertions(+), 37 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index a0b8e9e7dd..7698e7a59d 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -73,7 +73,7 @@ VariableLengthUTF8, ZDType, ZDTypeLike, - parse_data_type, + parse_dtype, ) from zarr.core.dtype.common import HasEndianness, HasItemSize, HasObjectCodec from zarr.core.indexing import ( @@ -618,7 +618,7 @@ async def _create( Deprecated in favor of :func:`zarr.api.asynchronous.create_array`. """ - dtype_parsed = parse_data_type(dtype, zarr_format=zarr_format) + dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format) store_path = await make_store_path(store) shape = parse_shapelike(shape) @@ -4239,7 +4239,7 @@ async def init_array( from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation - zdtype = parse_data_type(dtype, zarr_format=zarr_format) + zdtype = parse_dtype(dtype, zarr_format=zarr_format) shape_parsed = parse_shapelike(shape) chunk_key_encoding_parsed = _parse_chunk_key_encoding( chunk_key_encoding, zarr_format=zarr_format diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 1d36689ec8..959b0199af 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections.abc import Sequence from typing import TYPE_CHECKING, Final, TypeAlias from zarr.core.dtype.common import ( @@ -94,6 +95,7 @@ "ZDType", "data_type_registry", "parse_data_type", + "parse_dtype", ] data_type_registry = DataTypeRegistry() @@ -188,13 +190,69 @@ def parse_data_type( zarr_format: ZarrFormat, ) -> ZDType[TBaseDType, TBaseScalar]: """ - Interpret the input as a ZDType instance. + Interpret the input as a ZDType. + + This function wraps ``parse_dtype``. The only difference is the function name. This function may + be deprecated in a future version of Zarr Python in favor of ``parse_dtype``. + + Parameters + ---------- + dtype_spec : ZDTypeLike + The input to be interpreted as a ZDType. This could be a ZDType, which will be returned + directly, or a JSON representation of a ZDType, or a native dtype, or a python object that + can be converted into a native dtype. + zarr_format : ZarrFormat + The Zarr format version. + + Returns + ------- + ZDType[TBaseDType, TBaseScalar] + The ZDType corresponding to the input. + + Examples + -------- + >>> parse_dtype("int32", zarr_format=2) + Int32(endianness="little") + """ + return parse_dtype(dtype_spec, zarr_format=zarr_format) + + +def parse_dtype( + dtype_spec: ZDTypeLike, + *, + zarr_format: ZarrFormat, +) -> ZDType[TBaseDType, TBaseScalar]: + """ + Interpret the input as a ZDType. + + Parameters + ---------- + dtype_spec : ZDTypeLike + The input to be interpreted as a ZDType. This could be a ZDType, which will be returned + directly, or a JSON representation of a ZDType, or a native dtype, or a python object that + can be converted into a native dtype. + zarr_format : ZarrFormat + The Zarr format version. + + Returns + ------- + ZDType[TBaseDType, TBaseScalar] + The ZDType corresponding to the input. + + Examples + -------- + >>> parse_dtype("int32", zarr_format=2) + Int32(endianness="little") """ if isinstance(dtype_spec, ZDType): return dtype_spec - # dict and zarr_format 3 means that we have a JSON object representation of the dtype - if zarr_format == 3 and isinstance(dtype_spec, Mapping): - return get_data_type_from_json(dtype_spec, zarr_format=3) + # First attempt to interpret the input as JSON + if isinstance(dtype_spec, Mapping | str | Sequence): + try: + return get_data_type_from_json(dtype_spec, zarr_format=3) # type: ignore[arg-type] + except ValueError: + # no data type matched this JSON-like input + pass if dtype_spec in VLEN_UTF8_ALIAS: # If the dtype request is one of the aliases for variable-length UTF-8 strings, # return that dtype. diff --git a/src/zarr/dtype.py b/src/zarr/dtype.py index 79f3aa3a0f..844d38fe0e 100644 --- a/src/zarr/dtype.py +++ b/src/zarr/dtype.py @@ -39,6 +39,7 @@ ZDType, data_type_registry, parse_data_type, + parse_dtype, ) __all__ = [ @@ -84,4 +85,5 @@ "data_type_registry", "data_type_registry", "parse_data_type", + "parse_dtype", ] diff --git a/tests/test_array.py b/tests/test_array.py index c4201b4548..92209aff1a 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -53,7 +53,7 @@ VariableLengthBytes, VariableLengthUTF8, ZDType, - parse_data_type, + parse_dtype, ) from zarr.core.dtype.common import ENDIANNESS_STR, EndiannessStr from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str @@ -1308,7 +1308,7 @@ async def test_v2_chunk_encoding( filters=filters, ) filters_expected, compressor_expected = _parse_chunk_encoding_v2( - filters=filters, compressor=compressors, dtype=parse_data_type(dtype, zarr_format=2) + filters=filters, compressor=compressors, dtype=parse_dtype(dtype, zarr_format=2) ) assert arr.metadata.zarr_format == 2 # guard for mypy assert arr.metadata.compressor == compressor_expected diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py index 95ede9e1d7..f2394e2083 100644 --- a/tests/test_dtype_registry.py +++ b/tests/test_dtype_registry.py @@ -15,17 +15,13 @@ AnyDType, Bool, DataTypeRegistry, - DateTime64, FixedLengthUTF32, - Int8, - Int16, TBaseDType, TBaseScalar, - VariableLengthUTF8, ZDType, data_type_registry, get_data_type_from_json, - parse_data_type, + parse_dtype, ) if TYPE_CHECKING: @@ -174,28 +170,22 @@ def test_entrypoint_dtype(zarr_format: ZarrFormat) -> None: data_type_registry.unregister(TestDataType._zarr_v3_name) -@pytest.mark.parametrize( - ("dtype_params", "expected", "zarr_format"), - [ - ("str", VariableLengthUTF8(), 2), - ("str", VariableLengthUTF8(), 3), - ("int8", Int8(), 3), - (Int8(), Int8(), 3), - (">i2", Int16(endianness="big"), 2), - ("datetime64[10s]", DateTime64(unit="s", scale_factor=10), 2), - ( - {"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}}, - DateTime64(unit="s", scale_factor=10), - 3, - ), - ], -) -def test_parse_data_type( - dtype_params: Any, expected: ZDType[Any, Any], zarr_format: ZarrFormat -) -> None: +@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") +@pytest.mark.parametrize("data_type", zdtype_examples, ids=str) +def test_parse_data_type(data_type: ZDType[Any, Any], zarr_format: ZarrFormat) -> None: """ Test that parse_data_type accepts alternative representations of ZDType instances, and resolves those inputs to the expected ZDType instance. """ - observed = parse_data_type(dtype_params, zarr_format=zarr_format) - assert observed == expected + dtype_spec: Any + if zarr_format == 2: + dtype_spec = data_type.to_json(zarr_format=zarr_format)["name"] + else: + dtype_spec = data_type.to_json(zarr_format=zarr_format) + if dtype_spec == "|O": + msg = "Zarr data type resolution from object failed." + with pytest.raises(ValueError, match=msg): + parse_dtype(dtype_spec, zarr_format=zarr_format) + else: + observed = parse_dtype(dtype_spec, zarr_format=zarr_format) # type: ignore[arg-type] + assert observed == data_type diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index 395e036db2..ea2f834bb6 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -18,7 +18,7 @@ open_consolidated, ) from zarr.core.buffer import cpu, default_buffer_prototype -from zarr.core.dtype import parse_data_type +from zarr.core.dtype import parse_dtype from zarr.core.group import ConsolidatedMetadata, GroupMetadata from zarr.core.metadata import ArrayV3Metadata from zarr.core.metadata.v2 import ArrayV2Metadata @@ -504,7 +504,7 @@ async def test_consolidated_metadata_backwards_compatibility( async def test_consolidated_metadata_v2(self): store = zarr.storage.MemoryStore() g = await AsyncGroup.from_store(store, attributes={"key": "root"}, zarr_format=2) - dtype = parse_data_type("uint8", zarr_format=2) + dtype = parse_dtype("uint8", zarr_format=2) await g.create_array(name="a", shape=(1,), attributes={"key": "a"}, dtype=dtype) g1 = await g.create_group(name="g1", attributes={"key": "g1"}) await g1.create_group(name="g2", attributes={"key": "g2"}) From d04557d8e2b67e9b29064ccfe0cb998ed1347689 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Jul 2025 16:20:03 +0200 Subject: [PATCH 02/16] update docs --- docs/user-guide/data_types.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst index d4b49ca43f..a968cc4c86 100644 --- a/docs/user-guide/data_types.rst +++ b/docs/user-guide/data_types.rst @@ -412,7 +412,7 @@ attempt data type resolution against *every* data type class, and if, for some r type matches multiple Zarr data types, we treat this as an error and raise an exception. If you have a NumPy data type and you want to get the corresponding ``ZDType`` instance, you can use -the ``parse_data_type`` function, which will use the dynamic resolution described above. ``parse_data_type`` +the ``parse_dtype`` function, which will use the dynamic resolution described above. ``parse_dtype`` handles a range of input types: - NumPy data types: @@ -420,9 +420,9 @@ handles a range of input types: .. code-block:: python >>> import numpy as np - >>> from zarr.dtype import parse_data_type + >>> from zarr.dtype import parse_dtype >>> my_dtype = np.dtype('>M8[10s]') - >>> parse_data_type(my_dtype, zarr_format=2) + >>> parse_dtype(my_dtype, zarr_format=2) DateTime64(endianness='big', scale_factor=10, unit='s') @@ -431,7 +431,7 @@ handles a range of input types: .. code-block:: python >>> dtype_str = '>M8[10s]' - >>> parse_data_type(dtype_str, zarr_format=2) + >>> parse_dtype(dtype_str, zarr_format=2) DateTime64(endianness='big', scale_factor=10, unit='s') - ``ZDType`` instances: @@ -440,7 +440,7 @@ handles a range of input types: >>> from zarr.dtype import DateTime64 >>> zdt = DateTime64(endianness='big', scale_factor=10, unit='s') - >>> parse_data_type(zdt, zarr_format=2) # Use a ZDType (this is a no-op) + >>> parse_dtype(zdt, zarr_format=2) # Use a ZDType (this is a no-op) DateTime64(endianness='big', scale_factor=10, unit='s') - Python dictionaries (requires ``zarr_format=3``). These dictionaries must be consistent with the @@ -449,7 +449,7 @@ handles a range of input types: .. code-block:: python >>> dt_dict = {"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}} - >>> parse_data_type(dt_dict, zarr_format=3) + >>> parse_dtype(dt_dict, zarr_format=3) DateTime64(endianness='little', scale_factor=10, unit='s') - >>> parse_data_type(dt_dict, zarr_format=3).to_json(zarr_format=3) + >>> parse_dtype(dt_dict, zarr_format=3).to_json(zarr_format=3) {'name': 'numpy.datetime64', 'configuration': {'unit': 's', 'scale_factor': 10}} From 60099232332802cd4c7962a66f21be0352141e74 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Jul 2025 16:22:44 +0200 Subject: [PATCH 03/16] changelog --- changes/3264.fix.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changes/3264.fix.rst diff --git a/changes/3264.fix.rst b/changes/3264.fix.rst new file mode 100644 index 0000000000..efcbab514e --- /dev/null +++ b/changes/3264.fix.rst @@ -0,0 +1,4 @@ +- Expand the range of types accepted by ``parse_data_type`` to include strings and Sequences. +- Move the functionality of ``parse_data_type`` to a new function called ``parse_dtype``. This change + ensures that nomenclature is consistent across the codebase. ``parse_data_type`` remains, so this + change is not breaking. \ No newline at end of file From 082ad49cd1f4e0ddca58888b5ba2519efda6d7ee Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Jul 2025 16:38:41 +0200 Subject: [PATCH 04/16] remove type: ignore --- tests/test_dtype_registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py index f2394e2083..81b63c9dbb 100644 --- a/tests/test_dtype_registry.py +++ b/tests/test_dtype_registry.py @@ -187,5 +187,5 @@ def test_parse_data_type(data_type: ZDType[Any, Any], zarr_format: ZarrFormat) - with pytest.raises(ValueError, match=msg): parse_dtype(dtype_spec, zarr_format=zarr_format) else: - observed = parse_dtype(dtype_spec, zarr_format=zarr_format) # type: ignore[arg-type] + observed = parse_dtype(dtype_spec, zarr_format=zarr_format) assert observed == data_type From d684ada2b95f9d0b9fe2bb8c26a49cda31c432d3 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Jul 2025 17:54:17 +0200 Subject: [PATCH 05/16] add test to check that parse_dtype is parse_data_type --- tests/test_dtype_registry.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py index 81b63c9dbb..90f4ae736c 100644 --- a/tests/test_dtype_registry.py +++ b/tests/test_dtype_registry.py @@ -21,6 +21,7 @@ ZDType, data_type_registry, get_data_type_from_json, + parse_data_type, parse_dtype, ) @@ -174,7 +175,7 @@ def test_entrypoint_dtype(zarr_format: ZarrFormat) -> None: @pytest.mark.parametrize("data_type", zdtype_examples, ids=str) def test_parse_data_type(data_type: ZDType[Any, Any], zarr_format: ZarrFormat) -> None: """ - Test that parse_data_type accepts alternative representations of ZDType instances, and resolves + Test that parse_dtype accepts alternative representations of ZDType instances, and resolves those inputs to the expected ZDType instance. """ dtype_spec: Any @@ -189,3 +190,26 @@ def test_parse_data_type(data_type: ZDType[Any, Any], zarr_format: ZarrFormat) - else: observed = parse_dtype(dtype_spec, zarr_format=zarr_format) assert observed == data_type + + +@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") +@pytest.mark.parametrize("data_type", zdtype_examples, ids=str) +def test_parse_data_type_funcs(data_type: ZDType[Any, Any], zarr_format: ZarrFormat) -> None: + """ + Test that parse_data_type generates the same output as parse_dtype. + """ + dtype_spec: Any + if zarr_format == 2: + dtype_spec = data_type.to_json(zarr_format=zarr_format)["name"] + else: + dtype_spec = data_type.to_json(zarr_format=zarr_format) + if dtype_spec == "|O": + msg = "Zarr data type resolution from object failed." + with pytest.raises(ValueError, match=msg): + parse_dtype(dtype_spec, zarr_format=zarr_format) + with pytest.raises(ValueError, match=msg): + parse_data_type(dtype_spec, zarr_format=zarr_format) + else: + assert parse_dtype(dtype_spec, zarr_format=zarr_format) == parse_data_type( + dtype_spec, zarr_format=zarr_format + ) From 5feb937e5fbd5522328976afe14471dd27a3ae0d Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 22 Jul 2025 11:29:10 +0200 Subject: [PATCH 06/16] Update src/zarr/dtype.py Co-authored-by: David Stansby --- src/zarr/dtype.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zarr/dtype.py b/src/zarr/dtype.py index 844d38fe0e..2678a5fd3f 100644 --- a/src/zarr/dtype.py +++ b/src/zarr/dtype.py @@ -84,6 +84,5 @@ "ZDType", "data_type_registry", "data_type_registry", - "parse_data_type", "parse_dtype", ] From c42edf6049b8c8b62d6ccf347ea4aeda0c76e7bf Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 22 Jul 2025 11:35:33 +0200 Subject: [PATCH 07/16] fix docstring --- src/zarr/core/dtype/__init__.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 959b0199af..6d883341a5 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -232,7 +232,9 @@ def parse_dtype( directly, or a JSON representation of a ZDType, or a native dtype, or a python object that can be converted into a native dtype. zarr_format : ZarrFormat - The Zarr format version. + The Zarr format version. This parameter is required because this function will attempt to + parse the JSON representation of a data type, and the JSON representation of data types + is different between Zarr 2 and Zarr 3. Returns ------- @@ -241,6 +243,14 @@ def parse_dtype( Examples -------- + >>> from zarr.dtype import parse_dtype + >>> import numpy as np + >>> parse_dtype("int32", zarr_format=2) + Int32(endianness='little') + >>> parse_dtype(np.dtype('S10'), zarr_format=2) + NullTerminatedBytes(length=10) + >>> parse_dtype({"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}}, zarr_format=3) + DateTime64(endianness='little', scale_factor=10, unit='s') >>> parse_dtype("int32", zarr_format=2) Int32(endianness="little") """ From 6285005e928c48819cd05d49fe213e911f938609 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 22 Jul 2025 12:07:50 +0200 Subject: [PATCH 08/16] support the output of to_json(zarr_format=2) as input to parse_dtype --- src/zarr/core/dtype/__init__.py | 22 +++++++--- tests/test_dtype_registry.py | 72 +++++++++++++++++++-------------- 2 files changed, 58 insertions(+), 36 deletions(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 6d883341a5..083b263efb 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -202,7 +202,9 @@ def parse_data_type( directly, or a JSON representation of a ZDType, or a native dtype, or a python object that can be converted into a native dtype. zarr_format : ZarrFormat - The Zarr format version. + The Zarr format version. This parameter is required because this function will attempt to + parse the JSON representation of a data type, and the JSON representation of data types + varies between Zarr 2 and Zarr 3. Returns ------- @@ -211,10 +213,18 @@ def parse_data_type( Examples -------- - >>> parse_dtype("int32", zarr_format=2) + >>> from zarr.dtype import parse_data_type + >>> import numpy as np + >>> parse_data_type("int32", zarr_format=2) + Int32(endianness='little') + >>> parse_dtype(np.dtype('S10'), zarr_format=2) + NullTerminatedBytes(length=10) + >>> parse_data_type({"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}}, zarr_format=3) + DateTime64(endianness='little', scale_factor=10, unit='s') + >>> parse_data_type("int32", zarr_format=2) Int32(endianness="little") """ - return parse_dtype(dtype_spec, zarr_format=zarr_format) + return parse_data_type(dtype_spec, zarr_format=zarr_format) def parse_dtype( @@ -234,7 +244,7 @@ def parse_dtype( zarr_format : ZarrFormat The Zarr format version. This parameter is required because this function will attempt to parse the JSON representation of a data type, and the JSON representation of data types - is different between Zarr 2 and Zarr 3. + varies between Zarr 2 and Zarr 3. Returns ------- @@ -259,7 +269,7 @@ def parse_dtype( # First attempt to interpret the input as JSON if isinstance(dtype_spec, Mapping | str | Sequence): try: - return get_data_type_from_json(dtype_spec, zarr_format=3) # type: ignore[arg-type] + return get_data_type_from_json(dtype_spec, zarr_format=zarr_format) # type: ignore[arg-type] except ValueError: # no data type matched this JSON-like input pass @@ -268,5 +278,5 @@ def parse_dtype( # return that dtype. return VariableLengthUTF8() # type: ignore[return-value] # otherwise, we have either a numpy dtype string, or a zarr v3 dtype string, and in either case - # we can create a numpy dtype from it, and do the dtype inference from that + # we can create a native dtype from it, and do the dtype inference from that return get_data_type_from_native_dtype(dtype_spec) # type: ignore[arg-type] diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py index 90f4ae736c..2716665ff0 100644 --- a/tests/test_dtype_registry.py +++ b/tests/test_dtype_registry.py @@ -3,7 +3,7 @@ import re import sys from pathlib import Path -from typing import TYPE_CHECKING, Any, get_args +from typing import TYPE_CHECKING, Any, Literal, get_args import numpy as np import pytest @@ -24,6 +24,7 @@ parse_data_type, parse_dtype, ) +from zarr.core.dtype.common import unpack_dtype_json if TYPE_CHECKING: from collections.abc import Generator @@ -173,43 +174,54 @@ def test_entrypoint_dtype(zarr_format: ZarrFormat) -> None: @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("data_type", zdtype_examples, ids=str) -def test_parse_data_type(data_type: ZDType[Any, Any], zarr_format: ZarrFormat) -> None: +@pytest.mark.parametrize("json_style", [(2, "internal"), (2, "metadata"), (3, None)], ids=str) +@pytest.mark.parametrize( + "dtype_parser_func", [parse_dtype, parse_data_type], ids=["parse_dtype", "parse_data_type"] +) +def test_parse_data_type( + data_type: ZDType[Any, Any], + json_style: tuple[ZarrFormat, None | Literal["internal", "metadata"]], + dtype_parser_func: Any, +) -> None: """ - Test that parse_dtype accepts alternative representations of ZDType instances, and resolves - those inputs to the expected ZDType instance. + Test the parsing of data types into ZDType instances. + + This function tests the ability of `dtype_parser_func` to correctly + interpret and parse data type specifications into `ZDType` instances + according to the specified Zarr format and JSON style. + + Parameters + ---------- + data_type : ZDType[Any, Any] + The data type to be tested for parsing. + json_style : tuple[ZarrFormat, None or Literal["internal", "metadata"]] + A tuple specifying the Zarr format version and the JSON style + for Zarr V2 2. For Zarr V2 there are 2 JSON styles: "internal", and + "metadata". The internal style takes the form {"name": , "object_codec_id": }, + while the metadata style is just . + dtype_parser_func : Any + The function to be tested for parsing the data type. This is necessary for compatibility + reasons, as we support multiple functions that perform the same data type parsing operation. """ + zarr_format, style = json_style dtype_spec: Any - if zarr_format == 2: - dtype_spec = data_type.to_json(zarr_format=zarr_format)["name"] - else: - dtype_spec = data_type.to_json(zarr_format=zarr_format) - if dtype_spec == "|O": - msg = "Zarr data type resolution from object failed." - with pytest.raises(ValueError, match=msg): - parse_dtype(dtype_spec, zarr_format=zarr_format) - else: - observed = parse_dtype(dtype_spec, zarr_format=zarr_format) - assert observed == data_type - -@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") -@pytest.mark.parametrize("data_type", zdtype_examples, ids=str) -def test_parse_data_type_funcs(data_type: ZDType[Any, Any], zarr_format: ZarrFormat) -> None: - """ - Test that parse_data_type generates the same output as parse_dtype. - """ - dtype_spec: Any if zarr_format == 2: - dtype_spec = data_type.to_json(zarr_format=zarr_format)["name"] + dtype_spec = data_type.to_json(zarr_format=zarr_format) + if style == "internal": + pass + elif style == "metadata": + dtype_spec = unpack_dtype_json(dtype_spec) + else: + raise ValueError(f"Invalid zarr v2 json style: {style}") else: dtype_spec = data_type.to_json(zarr_format=zarr_format) + if dtype_spec == "|O": + # The object data type on its own is ambiguous and should fail to resolve. msg = "Zarr data type resolution from object failed." with pytest.raises(ValueError, match=msg): - parse_dtype(dtype_spec, zarr_format=zarr_format) - with pytest.raises(ValueError, match=msg): - parse_data_type(dtype_spec, zarr_format=zarr_format) + dtype_parser_func(dtype_spec, zarr_format=zarr_format) else: - assert parse_dtype(dtype_spec, zarr_format=zarr_format) == parse_data_type( - dtype_spec, zarr_format=zarr_format - ) + observed = dtype_parser_func(dtype_spec, zarr_format=zarr_format) + assert observed == data_type From b95f3be95a4b0799de8da75ea7b09e18b0f7d37a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 22 Jul 2025 12:09:26 +0200 Subject: [PATCH 09/16] lint --- src/zarr/dtype.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zarr/dtype.py b/src/zarr/dtype.py index 2678a5fd3f..80505ba2f7 100644 --- a/src/zarr/dtype.py +++ b/src/zarr/dtype.py @@ -38,7 +38,6 @@ VariableLengthUTF8JSON_V2, ZDType, data_type_registry, - parse_data_type, parse_dtype, ) From b0edab69d2b4c62a6e7facfeb6bd7ead2f4ef2f9 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 22 Jul 2025 12:17:21 +0200 Subject: [PATCH 10/16] remove infinite recursion --- src/zarr/core/dtype/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 083b263efb..ae50528fd5 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -224,7 +224,7 @@ def parse_data_type( >>> parse_data_type("int32", zarr_format=2) Int32(endianness="little") """ - return parse_data_type(dtype_spec, zarr_format=zarr_format) + return parse_dtype(dtype_spec, zarr_format=zarr_format) def parse_dtype( From 385efa015638429bedaa3f9066eba69dc6333d05 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 22 Jul 2025 13:22:54 +0200 Subject: [PATCH 11/16] Update src/zarr/core/dtype/__init__.py Co-authored-by: David Stansby --- src/zarr/core/dtype/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index ae50528fd5..7433cc90bf 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -217,7 +217,7 @@ def parse_data_type( >>> import numpy as np >>> parse_data_type("int32", zarr_format=2) Int32(endianness='little') - >>> parse_dtype(np.dtype('S10'), zarr_format=2) + >>> parse_data_type(np.dtype('S10'), zarr_format=2) NullTerminatedBytes(length=10) >>> parse_data_type({"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}}, zarr_format=3) DateTime64(endianness='little', scale_factor=10, unit='s') From f7b5387db8f80c766e35a09c72ec5f8f2b07dc70 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 22 Jul 2025 13:23:10 +0200 Subject: [PATCH 12/16] Update src/zarr/core/dtype/__init__.py Co-authored-by: David Stansby --- src/zarr/core/dtype/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 7433cc90bf..0bb55dbe69 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -238,7 +238,7 @@ def parse_dtype( Parameters ---------- dtype_spec : ZDTypeLike - The input to be interpreted as a ZDType. This could be a ZDType, which will be returned + The input to be converted to a ZDType. This could be a ZDType, which will be returned directly, or a JSON representation of a ZDType, or a native dtype, or a python object that can be converted into a native dtype. zarr_format : ZarrFormat From 6cef7d782cb27a6c05f4953461bf61e8c35f5531 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 22 Jul 2025 13:23:20 +0200 Subject: [PATCH 13/16] Update src/zarr/core/dtype/__init__.py Co-authored-by: David Stansby --- src/zarr/core/dtype/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 0bb55dbe69..f956db467b 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -221,8 +221,6 @@ def parse_data_type( NullTerminatedBytes(length=10) >>> parse_data_type({"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}}, zarr_format=3) DateTime64(endianness='little', scale_factor=10, unit='s') - >>> parse_data_type("int32", zarr_format=2) - Int32(endianness="little") """ return parse_dtype(dtype_spec, zarr_format=zarr_format) From 9a7d1db496dfa5d03312cb0b7be9ceed40a943f2 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 22 Jul 2025 13:23:30 +0200 Subject: [PATCH 14/16] Update src/zarr/core/dtype/__init__.py Co-authored-by: David Stansby --- src/zarr/core/dtype/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index f956db467b..e7ba37473c 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -259,8 +259,6 @@ def parse_dtype( NullTerminatedBytes(length=10) >>> parse_dtype({"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}}, zarr_format=3) DateTime64(endianness='little', scale_factor=10, unit='s') - >>> parse_dtype("int32", zarr_format=2) - Int32(endianness="little") """ if isinstance(dtype_spec, ZDType): return dtype_spec From d741513df471103d9cc16633b4a2346016ba7576 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 22 Jul 2025 13:23:38 +0200 Subject: [PATCH 15/16] Update src/zarr/core/dtype/__init__.py Co-authored-by: David Stansby --- src/zarr/core/dtype/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index e7ba37473c..a2917df65f 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -231,7 +231,7 @@ def parse_dtype( zarr_format: ZarrFormat, ) -> ZDType[TBaseDType, TBaseScalar]: """ - Interpret the input as a ZDType. + Convert the input as a ZDType. Parameters ---------- From 8585158ef1a26a72a70f2d1626e81ee123b041d9 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 22 Jul 2025 13:27:29 +0200 Subject: [PATCH 16/16] Update src/zarr/core/dtype/__init__.py Co-authored-by: David Stansby --- src/zarr/core/dtype/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index a2917df65f..bf09a7501e 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -237,7 +237,7 @@ def parse_dtype( ---------- dtype_spec : ZDTypeLike The input to be converted to a ZDType. This could be a ZDType, which will be returned - directly, or a JSON representation of a ZDType, or a native dtype, or a python object that + directly, or a JSON representation of a ZDType, or a numpy dtype, or a python object that can be converted into a native dtype. zarr_format : ZarrFormat The Zarr format version. This parameter is required because this function will attempt to