Skip to content

Commit 7133b42

Browse files
committed
flox: don't set fill_value where possible
Closes pydata#8090 Closes pydata#8206 Closes pydata#9398
1 parent 2783255 commit 7133b42

File tree

3 files changed

+58
-12
lines changed

3 files changed

+58
-12
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ Bug fixes
6060
- Fix deprecation warning that was raised when calling ``np.array`` on an ``xr.DataArray``
6161
in NumPy 2.0 (:issue:`9312`, :pull:`9393`)
6262
By `Andrew Scherer <https://github.com/andrew-s28>`_.
63+
- Fix a few bugs affecting groupby reductions with `flox`. (:issue:`8090`, :issue:`9398`).
64+
By `Deepak Cherian <https://github.com/dcherian>`_.
6365

6466
Documentation
6567
~~~~~~~~~~~~~

xarray/core/groupby.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -849,10 +849,6 @@ def _flox_reduce(
849849
else obj._coords
850850
)
851851

852-
any_isbin = any(
853-
isinstance(grouper.grouper, BinGrouper) for grouper in self.groupers
854-
)
855-
856852
if keep_attrs is None:
857853
keep_attrs = _get_keep_attrs(default=True)
858854

@@ -926,14 +922,14 @@ def _flox_reduce(
926922
):
927923
raise ValueError(f"cannot reduce over dimensions {dim}.")
928924

929-
if kwargs["func"] not in ["all", "any", "count"]:
930-
kwargs.setdefault("fill_value", np.nan)
931-
if any_isbin and kwargs["func"] == "count":
932-
# This is an annoying hack. Xarray returns np.nan
933-
# when there are no observations in a bin, instead of 0.
934-
# We can fake that here by forcing min_count=1.
935-
# note min_count makes no sense in the xarray world
936-
# as a kwarg for count, so this should be OK
925+
has_missing_groups = (
926+
self.encoded.unique_coord.size != self.encoded.full_index.size
927+
)
928+
if has_missing_groups or kwargs.get("min_count", 0) > 0:
929+
# Xarray *always* returns np.nan when there are no observations in a group,
930+
# We can fake that here by forcing min_count=1 when it is not set.
931+
# This handles boolean reductions, and count
932+
# See GH8090, GH9398
937933
kwargs.setdefault("fill_value", np.nan)
938934
kwargs.setdefault("min_count", 1)
939935

xarray/tests/test_groupby.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2784,6 +2784,54 @@ def test_multiple_groupers_mixed(use_flox) -> None:
27842784
# ------
27852785

27862786

2787+
@pytest.mark.parametrize(
2788+
"reduction", ["max", "min", "nanmax", "nanmin", "sum", "nansum", "prod", "nanprod"]
2789+
)
2790+
def test_groupby_preserve_dtype(reduction):
2791+
# all groups are present, we should follow numpy exactly
2792+
ds = xr.Dataset(
2793+
{
2794+
"test": (
2795+
["x", "y"],
2796+
np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="int16"),
2797+
)
2798+
},
2799+
coords={"idx": ("x", [1, 2, 1])},
2800+
)
2801+
2802+
kwargs = {}
2803+
if "nan" in reduction:
2804+
kwargs["skipna"] = True
2805+
reduction = reduction.removeprefix("nan")
2806+
actual = getattr(ds.groupby("idx"), reduction)(**kwargs).test.dtype
2807+
expected = getattr(np, reduction)(ds.test.data, axis=0).dtype
2808+
assert actual == expected
2809+
2810+
2811+
@requires_flox
2812+
@pytest.mark.parametrize("reduction", ["any", "all", "count"])
2813+
def test_gappy_resample_reductions(reduction):
2814+
# GH8090
2815+
dates = (("1988-12-01", "1990-11-30"), ("2000-12-01", "2001-11-30"))
2816+
times = [xr.date_range(*d, freq="D") for d in dates]
2817+
2818+
da = xr.concat(
2819+
[
2820+
xr.DataArray(np.random.rand(len(t)), coords={"time": t}, dims="time")
2821+
for t in times
2822+
],
2823+
dim="time",
2824+
).chunk(time=100)
2825+
2826+
rs = (da > 0.5).resample(time="YS-DEC")
2827+
method = getattr(rs, reduction)
2828+
with xr.set_options(use_flox=True):
2829+
actual = method(dim="time")
2830+
with xr.set_options(use_flox=False):
2831+
expected = method(dim="time")
2832+
assert_identical(expected, actual)
2833+
2834+
27872835
# Possible property tests
27882836
# 1. lambda x: x
27892837
# 2. grouped-reduce on unique coords is identical to array

0 commit comments

Comments
 (0)