Skip to content

Commit 5b3f127

Browse files
kmuehlbauerheadtr1ckpre-commit-ci[bot]dcherian
authored
Preserve order of variables in combine_by_coords (#9070)
* FIX: do not sort datasets in combine_by_coords * add test * add whats-new.rst entry * use groupby_defaultdict * Apply suggestions from code review Co-authored-by: Michael Niklas <mick.niklas@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix typing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update xarray/core/combine.py * fix typing, replace other occurrence * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix groupby * fix groupby --------- Co-authored-by: Michael Niklas <mick.niklas@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
1 parent 97a4a71 commit 5b3f127

File tree

3 files changed

+35
-9
lines changed

3 files changed

+35
-9
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ Bug fixes
106106
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
107107
- Fix weighted ``polyfit`` for arrays with more than two dimensions (:issue:`9972`, :pull:`9974`).
108108
By `Mattia Almansi <https://github.com/malmans2>`_.
109+
- Preserve order of variables in :py:func:`xarray.combine_by_coords` (:issue:`8828`, :pull:`9070`).
110+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
109111
- Cast ``numpy`` scalars to arrays in :py:meth:`NamedArray.from_arrays` (:issue:`10005`, :pull:`10008`)
110112
By `Justus Magin <https://github.com/keewis>`_.
111113

xarray/core/combine.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
from __future__ import annotations
22

3-
import itertools
4-
from collections import Counter
5-
from collections.abc import Iterable, Iterator, Sequence
3+
from collections import Counter, defaultdict
4+
from collections.abc import Callable, Hashable, Iterable, Iterator, Sequence
65
from typing import TYPE_CHECKING, Literal, TypeVar, Union, cast
76

87
import pandas as pd
@@ -269,10 +268,7 @@ def _combine_all_along_first_dim(
269268
combine_attrs: CombineAttrsOptions = "drop",
270269
):
271270
# Group into lines of datasets which must be combined along dim
272-
# need to sort by _new_tile_id first for groupby to work
273-
# TODO: is the sorted need?
274-
combined_ids = dict(sorted(combined_ids.items(), key=_new_tile_id))
275-
grouped = itertools.groupby(combined_ids.items(), key=_new_tile_id)
271+
grouped = groupby_defaultdict(list(combined_ids.items()), key=_new_tile_id)
276272

277273
# Combine all of these datasets along dim
278274
new_combined_ids = {}
@@ -606,6 +602,21 @@ def vars_as_keys(ds):
606602
return tuple(sorted(ds))
607603

608604

605+
K = TypeVar("K", bound=Hashable)
606+
607+
608+
def groupby_defaultdict(
609+
iter: list[T],
610+
key: Callable[[T], K],
611+
) -> Iterator[tuple[K, Iterator[T]]]:
612+
"""replacement for itertools.groupby"""
613+
idx = defaultdict(list)
614+
for i, obj in enumerate(iter):
615+
idx[key(obj)].append(i)
616+
for k, ix in idx.items():
617+
yield k, (iter[i] for i in ix)
618+
619+
609620
def _combine_single_variable_hypercube(
610621
datasets,
611622
fill_value=dtypes.NA,
@@ -965,8 +976,7 @@ def combine_by_coords(
965976
]
966977

967978
# Group by data vars
968-
sorted_datasets = sorted(data_objects, key=vars_as_keys)
969-
grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
979+
grouped_by_vars = groupby_defaultdict(data_objects, key=vars_as_keys)
970980

971981
# Perform the multidimensional combine on each group of data variables
972982
# before merging back together

xarray/tests/test_combine.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,6 +1043,20 @@ def test_combine_by_coords_incomplete_hypercube(self):
10431043
with pytest.raises(ValueError):
10441044
combine_by_coords([x1, x2, x3], fill_value=None)
10451045

1046+
def test_combine_by_coords_override_order(self) -> None:
1047+
# regression test for https://github.com/pydata/xarray/issues/8828
1048+
x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]})
1049+
x2 = Dataset(
1050+
{"a": (("y", "x"), [[2]]), "b": (("y", "x"), [[1]])},
1051+
coords={"y": [0], "x": [0]},
1052+
)
1053+
actual = combine_by_coords([x1, x2], compat="override")
1054+
assert_equal(actual["a"], actual["b"])
1055+
assert_equal(actual["a"], x1["a"])
1056+
1057+
actual = combine_by_coords([x2, x1], compat="override")
1058+
assert_equal(actual["a"], x2["a"])
1059+
10461060

10471061
class TestCombineMixedObjectsbyCoords:
10481062
def test_combine_by_coords_mixed_unnamed_dataarrays(self):

0 commit comments

Comments
 (0)