Skip to content

Commit fad1185

Browse files
jderdcherian
andauthored
Avoid copying vectorized indexes (#10316)
Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
1 parent 54ef8d2 commit fad1185

File tree

3 files changed

+48
-14
lines changed

3 files changed

+48
-14
lines changed

asv_bench/benchmarks/indexing.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,30 @@
3939
"2d-1scalar": xr.DataArray(randn(100, frac_nan=0.1), dims=["x"]),
4040
}
4141

42-
vectorized_indexes = {
43-
"1-1d": {"x": xr.DataArray(randint(0, nx, 400), dims="a")},
44-
"2-1d": {
45-
"x": xr.DataArray(randint(0, nx, 400), dims="a"),
46-
"y": xr.DataArray(randint(0, ny, 400), dims="a"),
47-
},
48-
"3-2d": {
49-
"x": xr.DataArray(randint(0, nx, 400).reshape(4, 100), dims=["a", "b"]),
50-
"y": xr.DataArray(randint(0, ny, 400).reshape(4, 100), dims=["a", "b"]),
51-
"t": xr.DataArray(randint(0, nt, 400).reshape(4, 100), dims=["a", "b"]),
52-
},
53-
}
42+
43+
def make_vectorized_indexes(n_index):
44+
return {
45+
"1-1d": {"x": xr.DataArray(randint(0, nx, n_index), dims="a")},
46+
"2-1d": {
47+
"x": xr.DataArray(randint(0, nx, n_index), dims="a"),
48+
"y": xr.DataArray(randint(0, ny, n_index), dims="a"),
49+
},
50+
"3-2d": {
51+
"x": xr.DataArray(
52+
randint(0, nx, n_index).reshape(n_index // 100, 100), dims=["a", "b"]
53+
),
54+
"y": xr.DataArray(
55+
randint(0, ny, n_index).reshape(n_index // 100, 100), dims=["a", "b"]
56+
),
57+
"t": xr.DataArray(
58+
randint(0, nt, n_index).reshape(n_index // 100, 100), dims=["a", "b"]
59+
),
60+
},
61+
}
62+
63+
64+
vectorized_indexes = make_vectorized_indexes(400)
65+
big_vectorized_indexes = make_vectorized_indexes(400_000)
5466

5567
vectorized_assignment_values = {
5668
"1-1d": xr.DataArray(randn((400, ny)), dims=["a", "y"], coords={"a": randn(400)}),
@@ -101,6 +113,20 @@ def time_indexing_basic_ds_large(self, key):
101113
self.ds_large.isel(**basic_indexes[key]).load()
102114

103115

116+
class IndexingOnly(Base):
117+
@parameterized(["key"], [list(basic_indexes.keys())])
118+
def time_indexing_basic(self, key):
119+
self.ds.isel(**basic_indexes[key])
120+
121+
@parameterized(["key"], [list(outer_indexes.keys())])
122+
def time_indexing_outer(self, key):
123+
self.ds.isel(**outer_indexes[key])
124+
125+
@parameterized(["key"], [list(big_vectorized_indexes.keys())])
126+
def time_indexing_big_vectorized(self, key):
127+
self.ds.isel(**big_vectorized_indexes[key])
128+
129+
104130
class Assignment(Base):
105131
@parameterized(["key"], [list(basic_indexes.keys())])
106132
def time_assignment_basic(self, key):

doc/whats-new.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ Bug fixes
5959
and prevents round-tripping them as :py:class:`numpy.datetime64` values
6060
(:pull:`10352`). By `Spencer Clark <https://github.com/spencerkclark>`_.
6161

62+
Performance
63+
~~~~~~~~~~~
64+
- Lazily indexed arrays now use less memory to store keys by avoiding copies
65+
in :py:class:`~xarray.indexing.VectorizedIndexer` and :py:class:`~xarray.indexing.OuterIndexer`
66+
(:issue:`10316`).
67+
By `Jesse Rusak <https://github.com/jder>`_.
68+
69+
6270
Documentation
6371
~~~~~~~~~~~~~
6472

xarray/core/indexing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ def __init__(
444444
f"invalid indexer array for {type(self).__name__}; must be scalar "
445445
f"or have 1 dimension: {k!r}"
446446
)
447-
k = k.astype(np.int64) # type: ignore[union-attr]
447+
k = duck_array_ops.astype(k, np.int64, copy=False)
448448
else:
449449
raise TypeError(
450450
f"unexpected indexer type for {type(self).__name__}: {k!r}"
@@ -488,7 +488,7 @@ def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...
488488
"invalid indexer key: ndarray arguments "
489489
f"have different numbers of dimensions: {ndims}"
490490
)
491-
k = k.astype(np.int64) # type: ignore[union-attr]
491+
k = duck_array_ops.astype(k, np.int64, copy=False)
492492
else:
493493
raise TypeError(
494494
f"unexpected indexer type for {type(self).__name__}: {k!r}"

0 commit comments

Comments
 (0)