Skip to content

Commit 9da38f7

Browse files
TomAugspurgerd-v-b
andauthored
Add NDBuffer.empty (#3191)
In #2904, I've found that `np.empty(...)` can be quite a bit faster than the `np.full` we use in places (https://github.com/zarr-developers/zarr-python/blob/baabf08d07e8518e3d37bd83c493a1d46ea7ac1d/src/zarr/core/buffer/cpu.py#L149. Though note that `np.empty` and `np.zeros` are about the same for the cases where we use that.) For the common case of chunk-aligned reads, the memset used by `np.full` or `np.zeros` should be unnecessary, because the codec pipeline will overwrite the memory anyway (or overwritten with `fill_value` if the store is missing the key). This adds a new method `NDBuffer.empty`, mirroring `np.empty`. To preserve backwards compatibility, it's *not* abstract. It delegates to the less efficient `NDBuffer.create`. But I've implemented it for our `gpu` and `cpu` buffers. Co-authored-by: Davis Bennett <davis.v.bennett@gmail.com>
1 parent a7080a3 commit 9da38f7

File tree

7 files changed

+104
-12
lines changed

7 files changed

+104
-12
lines changed

changes/3191.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added `NDBuffer.empty` method for faster ndbuffer initialization.

src/zarr/core/buffer/core.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,41 @@ def create(
374374
cast("NDArrayLike", None)
375375
) # This line will never be reached, but it satisfies the type checker
376376

377+
@classmethod
378+
def empty(
379+
cls, shape: ChunkCoords, dtype: npt.DTypeLike, order: Literal["C", "F"] = "C"
380+
) -> Self:
381+
"""
382+
Create an empty buffer with the given shape, dtype, and order.
383+
384+
This method can be faster than ``NDBuffer.create`` because it doesn't
385+
have to initialize the memory used by the underlying ndarray-like
386+
object.
387+
388+
Parameters
389+
----------
390+
shape
391+
The shape of the buffer and its underlying ndarray-like object
392+
dtype
393+
The datatype of the buffer and its underlying ndarray-like object
394+
order
395+
Whether to store multi-dimensional data in row-major (C-style) or
396+
column-major (Fortran-style) order in memory.
397+
398+
Returns
399+
-------
400+
buffer
401+
New buffer representing a new ndarray_like object with empty data.
402+
403+
See Also
404+
--------
405+
NDBuffer.create
406+
Create a new buffer with some initial fill value.
407+
"""
408+
# Implementations should override this method if they have a faster way
409+
# to allocate an empty buffer.
410+
return cls.create(shape=shape, dtype=dtype, order=order)
411+
377412
@classmethod
378413
def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self:
379414
"""Create a new buffer of a ndarray-like object

src/zarr/core/buffer/cpu.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from typing import Self
2121

2222
from zarr.core.buffer.core import ArrayLike, NDArrayLike
23-
from zarr.core.common import BytesLike
23+
from zarr.core.common import BytesLike, ChunkCoords
2424

2525

2626
class Buffer(core.Buffer):
@@ -160,6 +160,12 @@ def create(
160160
else:
161161
return cls(np.full(shape=tuple(shape), fill_value=fill_value, dtype=dtype, order=order))
162162

163+
@classmethod
164+
def empty(
165+
cls, shape: ChunkCoords, dtype: npt.DTypeLike, order: Literal["C", "F"] = "C"
166+
) -> Self:
167+
return cls(np.empty(shape=shape, dtype=dtype, order=order))
168+
163169
@classmethod
164170
def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self:
165171
return cls.from_ndarray_like(np.asanyarray(array_like))

src/zarr/core/buffer/gpu.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from collections.abc import Iterable
2323
from typing import Self
2424

25-
from zarr.core.common import BytesLike
25+
from zarr.core.common import BytesLike, ChunkCoords
2626

2727
try:
2828
import cupy as cp
@@ -178,6 +178,12 @@ def create(
178178
ret.fill(fill_value)
179179
return ret
180180

181+
@classmethod
182+
def empty(
183+
cls, shape: ChunkCoords, dtype: npt.DTypeLike, order: Literal["C", "F"] = "C"
184+
) -> Self:
185+
return cls(cp.empty(shape=shape, dtype=dtype, order=order))
186+
181187
@classmethod
182188
def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self:
183189
"""Create a new buffer of Numpy array-like object

src/zarr/testing/buffer.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from collections.abc import Iterable
1414
from typing import Self
1515

16+
from zarr.core.common import ChunkCoords
17+
1618

1719
__all__ = [
1820
"NDBufferUsingTestNDArrayLike",
@@ -51,6 +53,15 @@ def create(
5153
ret.fill(fill_value)
5254
return ret
5355

56+
@classmethod
57+
def empty(
58+
cls,
59+
shape: ChunkCoords,
60+
dtype: npt.DTypeLike,
61+
order: Literal["C", "F"] = "C",
62+
) -> Self:
63+
return super(cpu.NDBuffer, cls).empty(shape=shape, dtype=dtype, order=order)
64+
5465

5566
class StoreExpectingTestBuffer(MemoryStore):
5667
"""Example of a custom Store that expect MyBuffer for all its non-metadata

src/zarr/testing/utils.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,10 @@ def has_cupy() -> bool:
4040
T = TypeVar("T")
4141

4242

43+
gpu_mark = pytest.mark.gpu
44+
skip_if_no_gpu = pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available")
45+
46+
4347
# Decorator for GPU tests
4448
def gpu_test(func: T) -> T:
45-
return cast(
46-
"T",
47-
pytest.mark.gpu(
48-
pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available")(
49-
func
50-
)
51-
),
52-
)
49+
return cast("T", gpu_mark(skip_if_no_gpu(func)))

tests/test_buffer.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import TYPE_CHECKING
3+
from typing import TYPE_CHECKING, Literal
44

55
import numpy as np
66
import pytest
@@ -20,7 +20,7 @@
2020
TestBuffer,
2121
TestNDArrayLike,
2222
)
23-
from zarr.testing.utils import gpu_test
23+
from zarr.testing.utils import gpu_mark, gpu_test, skip_if_no_gpu
2424

2525
if TYPE_CHECKING:
2626
import types
@@ -200,3 +200,39 @@ def test_gpu_buffer_prototype() -> None:
200200
def test_cpu_buffer_as_scalar() -> None:
201201
buf = cpu.buffer_prototype.nd_buffer.create(shape=(), dtype="int64")
202202
assert buf.as_scalar() == buf.as_ndarray_like()[()] # type: ignore[index]
203+
204+
205+
@pytest.mark.parametrize(
206+
"prototype",
207+
[
208+
cpu.buffer_prototype,
209+
pytest.param(
210+
gpu.buffer_prototype,
211+
marks=[gpu_mark, skip_if_no_gpu],
212+
),
213+
BufferPrototype(
214+
buffer=cpu.Buffer,
215+
nd_buffer=NDBufferUsingTestNDArrayLike,
216+
),
217+
],
218+
)
219+
@pytest.mark.parametrize(
220+
"shape",
221+
[
222+
(1, 2),
223+
(1, 2, 3),
224+
],
225+
)
226+
@pytest.mark.parametrize("dtype", ["int32", "float64"])
227+
@pytest.mark.parametrize("order", ["C", "F"])
228+
def test_empty(
229+
prototype: BufferPrototype, shape: tuple[int, ...], dtype: str, order: Literal["C", "F"]
230+
) -> None:
231+
buf = prototype.nd_buffer.empty(shape=shape, dtype=dtype, order=order)
232+
result = buf.as_ndarray_like()
233+
assert result.shape == shape
234+
assert result.dtype == dtype
235+
if order == "C":
236+
assert result.flags.c_contiguous # type: ignore[attr-defined]
237+
else:
238+
assert result.flags.f_contiguous # type: ignore[attr-defined]

0 commit comments

Comments
 (0)