Skip to content

Commit d6d2450

Browse files
benbovyIllviljan
andauthored
Some alignment optimizations (#7382)
* compare indexes: return early if all same objects This may happen in some (rare?) cases where the objects to align share the same indexes. * avoid re-indexing when not needed If all unindexed dimension sizes match the indexed dimension sizes in the objects to align, we don't need re-indexing. * add benchmark * update what's new Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com>
1 parent fc1dbb5 commit d6d2450

File tree

4 files changed

+50
-2
lines changed

4 files changed

+50
-2
lines changed

asv_bench/benchmarks/indexing.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,18 @@ def time_indexing(self):
147147

148148
def cleanup(self):
149149
self.ds.close()
150+
151+
152+
class AssignmentOptimized:
153+
# https://github.com/pydata/xarray/pull/7382
154+
def setup(self):
155+
self.ds = xr.Dataset(coords={"x": np.arange(500_000)})
156+
self.da = xr.DataArray(np.arange(500_000), dims="x")
157+
158+
def time_assign_no_reindex(self):
159+
# assign with non-indexed DataArray of same dimension size
160+
self.ds.assign(foo=self.da)
161+
162+
def time_assign_identical_indexes(self):
163+
# fastpath index comparison (same index object)
164+
self.ds.assign(foo=self.ds.x)

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ Bug fixes
4545
- add a ``keep_attrs`` parameter to :py:meth:`Dataset.pad`, :py:meth:`DataArray.pad`,
4646
and :py:meth:`Variable.pad` (:pull:`7267`).
4747
By `Justus Magin <https://github.com/keewis>`_.
48+
- Fixed performance regression in alignment between indexed and non-indexed objects
49+
of the same shape (:pull:`7382`).
50+
By `Benoît Bovy <https://github.com/benbovy>`_.
4851
- Preserve original dtype on accessing MultiIndex levels (:issue:`7250`,
4952
:pull:`7393`). By `Ian Carroll <https://github.com/itcarroll>`_.
5053

xarray/core/alignment.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,8 +343,33 @@ def _need_reindex(self, dims, cmp_indexes) -> bool:
343343
pandas). This is useful, e.g., for overwriting such duplicate indexes.
344344
345345
"""
346-
has_unindexed_dims = any(dim in self.unindexed_dim_sizes for dim in dims)
347-
return not (indexes_all_equal(cmp_indexes)) or has_unindexed_dims
346+
if not indexes_all_equal(cmp_indexes):
347+
# always reindex when matching indexes are not equal
348+
return True
349+
350+
unindexed_dims_sizes = {}
351+
for dim in dims:
352+
if dim in self.unindexed_dim_sizes:
353+
sizes = self.unindexed_dim_sizes[dim]
354+
if len(sizes) > 1:
355+
# reindex if different sizes are found for unindexed dims
356+
return True
357+
else:
358+
unindexed_dims_sizes[dim] = next(iter(sizes))
359+
360+
if unindexed_dims_sizes:
361+
indexed_dims_sizes = {}
362+
for cmp in cmp_indexes:
363+
index_vars = cmp[1]
364+
for var in index_vars.values():
365+
indexed_dims_sizes.update(var.sizes)
366+
367+
for dim, size in unindexed_dims_sizes.items():
368+
if indexed_dims_sizes.get(dim, -1) != size:
369+
# reindex if unindexed dimension size doesn't match
370+
return True
371+
372+
return False
348373

349374
def _get_index_joiner(self, index_cls) -> Callable:
350375
if self.join in ["outer", "inner"]:

xarray/core/indexes.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,6 +1419,11 @@ def check_variables():
14191419
)
14201420

14211421
indexes = [e[0] for e in elements]
1422+
1423+
same_objects = all(indexes[0] is other_idx for other_idx in indexes[1:])
1424+
if same_objects:
1425+
return True
1426+
14221427
same_type = all(type(indexes[0]) is type(other_idx) for other_idx in indexes[1:])
14231428
if same_type:
14241429
try:

0 commit comments

Comments
 (0)