Skip to content

Commit a7080a3

Browse files
ianhid-v-b
andauthored
fix: restore fill_value=None for zarr_format=2 (#3198)
* fix: restore fill_value=None for zarr_format=2 * Fix: Restore Zarr Format 2 default fill value behavior Introduce a new Singleton sentinel class to indicate that the default scalar for the dtype should be used as the fill_value. This allows for preserving the zarr_format 2 behavior of None -> null. For zarr format 3 either the DefaulFillValue or None imply using the dtype default scalar value. * simplify default fill value class * test: add test of None backwards compat * Update tests/test_array.py * Update src/zarr/core/array.py * Update src/zarr/core/array.py * changelog * docstring --------- Co-authored-by: Davis Bennett <davis.v.bennett@gmail.com>
1 parent 5a24487 commit a7080a3

File tree

4 files changed

+81
-25
lines changed

4 files changed

+81
-25
lines changed

changes/3198.bugfix.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Restores the ability to create a Zarr V2 array with a ``null`` fill value by introducing a new
2+
class ``DefaultFillValue``, and setting the default value of the ``fill_value`` parameter in array
3+
creation routines to an instance of ``DefaultFillValue``. For Zarr V3 arrays, ``None`` will act as an
4+
alias for a ``DefaultFillValue`` instance, thus preserving compatibility with existing code.

src/zarr/api/synchronous.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import zarr.api.asynchronous as async_api
88
import zarr.core.array
99
from zarr._compat import _deprecate_positional_args
10-
from zarr.core.array import Array, AsyncArray, CompressorLike
10+
from zarr.core.array import DEFAULT_FILL_VALUE, Array, AsyncArray, CompressorLike
1111
from zarr.core.group import Group
1212
from zarr.core.sync import sync
1313
from zarr.core.sync_group import create_hierarchy
@@ -606,7 +606,7 @@ def create(
606606
chunks: ChunkCoords | int | bool | None = None,
607607
dtype: ZDTypeLike | None = None,
608608
compressor: CompressorLike = "auto",
609-
fill_value: Any | None = None, # TODO: need type
609+
fill_value: Any | None = DEFAULT_FILL_VALUE, # TODO: need type
610610
order: MemoryOrder | None = None,
611611
store: str | StoreLike | None = None,
612612
synchronizer: Any | None = None,
@@ -763,7 +763,7 @@ def create_array(
763763
filters: FiltersLike = "auto",
764764
compressors: CompressorsLike = "auto",
765765
serializer: SerializerLike = "auto",
766-
fill_value: Any | None = None,
766+
fill_value: Any | None = DEFAULT_FILL_VALUE,
767767
order: MemoryOrder | None = None,
768768
zarr_format: ZarrFormat | None = 3,
769769
attributes: dict[str, JSON] | None = None,
@@ -929,7 +929,7 @@ def from_array(
929929
filters: FiltersLike | Literal["keep"] = "keep",
930930
compressors: CompressorsLike | Literal["keep"] = "keep",
931931
serializer: SerializerLike | Literal["keep"] = "keep",
932-
fill_value: Any | None = None,
932+
fill_value: Any | None = DEFAULT_FILL_VALUE,
933933
order: MemoryOrder | None = None,
934934
zarr_format: ZarrFormat | None = None,
935935
attributes: dict[str, JSON] | None = None,

src/zarr/core/array.py

Lines changed: 51 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,34 @@
138138

139139

140140
# Array and AsyncArray are defined in the base ``zarr`` namespace
141-
__all__ = ["create_codec_pipeline", "parse_array_metadata"]
141+
__all__ = [
142+
"DEFAULT_FILL_VALUE",
143+
"DefaultFillValue",
144+
"create_codec_pipeline",
145+
"parse_array_metadata",
146+
]
142147

143148
logger = getLogger(__name__)
144149

145150

151+
class DefaultFillValue:
152+
"""
153+
Sentinel class to indicate that the default fill value should be used.
154+
155+
This class exists because conventional values used to convey "defaultness" like ``None`` or
156+
``"auto"` are ambiguous when specifying the fill value parameter of a Zarr array.
157+
The value ``None`` is ambiguous because it is a valid fill value for Zarr V2
158+
(resulting in ``"fill_value": null`` in array metadata).
159+
A string like ``"auto"`` is ambiguous because such a string is a valid fill value for an array
160+
with a string data type.
161+
An instance of this class lies outside the space of valid fill values, which means it can
162+
umambiguously express that the default fill value should be used.
163+
"""
164+
165+
166+
DEFAULT_FILL_VALUE = DefaultFillValue()
167+
168+
146169
def parse_array_metadata(data: Any) -> ArrayMetadata:
147170
if isinstance(data, ArrayMetadata):
148171
return data
@@ -296,7 +319,7 @@ async def create(
296319
shape: ShapeLike,
297320
dtype: ZDTypeLike,
298321
zarr_format: Literal[2],
299-
fill_value: Any | None = None,
322+
fill_value: Any | None = DEFAULT_FILL_VALUE,
300323
attributes: dict[str, JSON] | None = None,
301324
chunks: ShapeLike | None = None,
302325
dimension_separator: Literal[".", "/"] | None = None,
@@ -320,7 +343,7 @@ async def create(
320343
shape: ShapeLike,
321344
dtype: ZDTypeLike,
322345
zarr_format: Literal[3],
323-
fill_value: Any | None = None,
346+
fill_value: Any | None = DEFAULT_FILL_VALUE,
324347
attributes: dict[str, JSON] | None = None,
325348
# v3 only
326349
chunk_shape: ShapeLike | None = None,
@@ -348,7 +371,7 @@ async def create(
348371
shape: ShapeLike,
349372
dtype: ZDTypeLike,
350373
zarr_format: Literal[3] = 3,
351-
fill_value: Any | None = None,
374+
fill_value: Any | None = DEFAULT_FILL_VALUE,
352375
attributes: dict[str, JSON] | None = None,
353376
# v3 only
354377
chunk_shape: ShapeLike | None = None,
@@ -376,7 +399,7 @@ async def create(
376399
shape: ShapeLike,
377400
dtype: ZDTypeLike,
378401
zarr_format: ZarrFormat,
379-
fill_value: Any | None = None,
402+
fill_value: Any | None = DEFAULT_FILL_VALUE,
380403
attributes: dict[str, JSON] | None = None,
381404
# v3 only
382405
chunk_shape: ShapeLike | None = None,
@@ -411,7 +434,7 @@ async def create(
411434
shape: ShapeLike,
412435
dtype: ZDTypeLike,
413436
zarr_format: ZarrFormat = 3,
414-
fill_value: Any | None = None,
437+
fill_value: Any | None = DEFAULT_FILL_VALUE,
415438
attributes: dict[str, JSON] | None = None,
416439
# v3 only
417440
chunk_shape: ShapeLike | None = None,
@@ -552,7 +575,7 @@ async def _create(
552575
shape: ShapeLike,
553576
dtype: ZDTypeLike | ZDType[TBaseDType, TBaseScalar],
554577
zarr_format: ZarrFormat = 3,
555-
fill_value: Any | None = None,
578+
fill_value: Any | None = DEFAULT_FILL_VALUE,
556579
attributes: dict[str, JSON] | None = None,
557580
# v3 only
558581
chunk_shape: ShapeLike | None = None,
@@ -673,7 +696,7 @@ def _create_metadata_v3(
673696
shape: ShapeLike,
674697
dtype: ZDType[TBaseDType, TBaseScalar],
675698
chunk_shape: ChunkCoords,
676-
fill_value: Any | None = None,
699+
fill_value: Any | None = DEFAULT_FILL_VALUE,
677700
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
678701
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
679702
dimension_names: DimensionNames = None,
@@ -698,8 +721,9 @@ def _create_metadata_v3(
698721
else:
699722
chunk_key_encoding_parsed = chunk_key_encoding
700723

701-
if fill_value is None:
702-
# v3 spec will not allow a null fill value
724+
if isinstance(fill_value, DefaultFillValue) or fill_value is None:
725+
# Use dtype's default scalar for DefaultFillValue sentinel
726+
# For v3, None is converted to DefaultFillValue behavior
703727
fill_value_parsed = dtype.default_scalar()
704728
else:
705729
fill_value_parsed = fill_value
@@ -725,7 +749,7 @@ async def _create_v3(
725749
dtype: ZDType[TBaseDType, TBaseScalar],
726750
chunk_shape: ChunkCoords,
727751
config: ArrayConfig,
728-
fill_value: Any | None = None,
752+
fill_value: Any | None = DEFAULT_FILL_VALUE,
729753
chunk_key_encoding: (
730754
ChunkKeyEncodingLike
731755
| tuple[Literal["default"], Literal[".", "/"]]
@@ -774,22 +798,28 @@ def _create_metadata_v2(
774798
chunks: ChunkCoords,
775799
order: MemoryOrder,
776800
dimension_separator: Literal[".", "/"] | None = None,
777-
fill_value: float | None = None,
801+
fill_value: Any | None = DEFAULT_FILL_VALUE,
778802
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
779803
compressor: CompressorLikev2 = None,
780804
attributes: dict[str, JSON] | None = None,
781805
) -> ArrayV2Metadata:
782806
if dimension_separator is None:
783807
dimension_separator = "."
784-
if fill_value is None:
785-
fill_value = dtype.default_scalar() # type: ignore[assignment]
808+
809+
# Handle DefaultFillValue sentinel
810+
if isinstance(fill_value, DefaultFillValue):
811+
fill_value_parsed: Any = dtype.default_scalar()
812+
else:
813+
# For v2, preserve None as-is (backward compatibility)
814+
fill_value_parsed = fill_value
815+
786816
return ArrayV2Metadata(
787817
shape=shape,
788818
dtype=dtype,
789819
chunks=chunks,
790820
order=order,
791821
dimension_separator=dimension_separator,
792-
fill_value=fill_value,
822+
fill_value=fill_value_parsed,
793823
compressor=compressor,
794824
filters=filters,
795825
attributes=attributes,
@@ -806,7 +836,7 @@ async def _create_v2(
806836
order: MemoryOrder,
807837
config: ArrayConfig,
808838
dimension_separator: Literal[".", "/"] | None = None,
809-
fill_value: float | None = None,
839+
fill_value: Any | None = DEFAULT_FILL_VALUE,
810840
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
811841
compressor: CompressorLike = "auto",
812842
attributes: dict[str, JSON] | None = None,
@@ -1750,7 +1780,7 @@ def create(
17501780
shape: ChunkCoords,
17511781
dtype: ZDTypeLike,
17521782
zarr_format: ZarrFormat = 3,
1753-
fill_value: Any | None = None,
1783+
fill_value: Any | None = DEFAULT_FILL_VALUE,
17541784
attributes: dict[str, JSON] | None = None,
17551785
# v3 only
17561786
chunk_shape: ChunkCoords | None = None,
@@ -1879,7 +1909,7 @@ def _create(
18791909
shape: ChunkCoords,
18801910
dtype: ZDTypeLike,
18811911
zarr_format: ZarrFormat = 3,
1882-
fill_value: Any | None = None,
1912+
fill_value: Any | None = DEFAULT_FILL_VALUE,
18831913
attributes: dict[str, JSON] | None = None,
18841914
# v3 only
18851915
chunk_shape: ChunkCoords | None = None,
@@ -3836,7 +3866,7 @@ async def from_array(
38363866
filters: FiltersLike | Literal["keep"] = "keep",
38373867
compressors: CompressorsLike | Literal["keep"] = "keep",
38383868
serializer: SerializerLike | Literal["keep"] = "keep",
3839-
fill_value: Any | None = None,
3869+
fill_value: Any | None = DEFAULT_FILL_VALUE,
38403870
order: MemoryOrder | None = None,
38413871
zarr_format: ZarrFormat | None = None,
38423872
attributes: dict[str, JSON] | None = None,
@@ -4098,7 +4128,7 @@ async def init_array(
40984128
filters: FiltersLike = "auto",
40994129
compressors: CompressorsLike = "auto",
41004130
serializer: SerializerLike = "auto",
4101-
fill_value: Any | None = None,
4131+
fill_value: Any | None = DEFAULT_FILL_VALUE,
41024132
order: MemoryOrder | None = None,
41034133
zarr_format: ZarrFormat | None = 3,
41044134
attributes: dict[str, JSON] | None = None,
@@ -4319,7 +4349,7 @@ async def create_array(
43194349
filters: FiltersLike = "auto",
43204350
compressors: CompressorsLike = "auto",
43214351
serializer: SerializerLike = "auto",
4322-
fill_value: Any | None = None,
4352+
fill_value: Any | None = DEFAULT_FILL_VALUE,
43234353
order: MemoryOrder | None = None,
43244354
zarr_format: ZarrFormat | None = 3,
43254355
attributes: dict[str, JSON] | None = None,

tests/test_array.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,6 +1012,28 @@ def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None:
10121012
else:
10131013
assert a.fill_value == dtype.default_scalar()
10141014

1015+
@staticmethod
1016+
# @pytest.mark.parametrize("zarr_format", [2, 3])
1017+
@pytest.mark.parametrize("dtype", zdtype_examples)
1018+
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
1019+
def test_default_fill_value_None(
1020+
dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat
1021+
) -> None:
1022+
"""
1023+
Test that the fill value of an array is set to the default value for an explicit None argument for
1024+
Zarr Format 3, and to null for Zarr Format 2
1025+
"""
1026+
a = zarr.create_array(
1027+
store, shape=(5,), chunks=(5,), dtype=dtype, fill_value=None, zarr_format=zarr_format
1028+
)
1029+
if zarr_format == 3:
1030+
if isinstance(dtype, DateTime64 | TimeDelta64) and np.isnat(a.fill_value):
1031+
assert np.isnat(dtype.default_scalar())
1032+
else:
1033+
assert a.fill_value == dtype.default_scalar()
1034+
elif zarr_format == 2:
1035+
assert a.fill_value is None
1036+
10151037
@staticmethod
10161038
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
10171039
@pytest.mark.parametrize("dtype", zdtype_examples)

0 commit comments

Comments
 (0)