Skip to content

Commit a9f9d1e

Browse files
dcherianIllviljan
andauthored
Fix min_count behaviour with flox. (#7809)
* Fix `min_count` behaviour with flox. Closes #7808 * Apply suggestions from code review Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --------- Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com>
1 parent 3063100 commit a9f9d1e

File tree

3 files changed

+45
-0
lines changed

3 files changed

+45
-0
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ Bug fixes
4242
~~~~~~~~~
4343
- Fix groupby binary ops when grouped array is subset relative to other. (:issue:`7797`).
4444
By `Deepak Cherian <https://github.com/dcherian>`_.
45+
- Fix groupby sum, prod for all-NaN groups with ``flox``. (:issue:`7808`).
46+
By `Deepak Cherian <https://github.com/dcherian>`_.
4547

4648
Documentation
4749
~~~~~~~~~~~~~

xarray/core/groupby.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,13 @@ def _flox_reduce(
964964
else:
965965
non_numeric = {}
966966

967+
if "min_count" in kwargs:
968+
if kwargs["func"] not in ["sum", "prod"]:
969+
raise TypeError("Received an unexpected keyword argument 'min_count'")
970+
elif kwargs["min_count"] is None:
971+
# set explicitly to avoid unncessarily accumulating count
972+
kwargs["min_count"] = 0
973+
967974
# weird backcompat
968975
# reducing along a unique indexed dimension with squeeze=True
969976
# should raise an error

xarray/tests/test_groupby.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import datetime
4+
import operator
45
import warnings
56

67
import numpy as np
@@ -17,6 +18,7 @@
1718
assert_identical,
1819
create_test_data,
1920
has_cftime,
21+
has_flox,
2022
has_pandas_version_two,
2123
requires_dask,
2224
requires_flox,
@@ -2336,3 +2338,37 @@ def test_groupby_binary_op_regression() -> None:
23362338
anom_gb = x_slice.groupby("time.month") - clim
23372339

23382340
assert_identical(xr.zeros_like(anom_gb), anom_gb)
2341+
2342+
2343+
@requires_flox
2344+
@pytest.mark.parametrize("func", ["sum", "prod"])
2345+
@pytest.mark.parametrize("skipna", [True, False])
2346+
@pytest.mark.parametrize("min_count", [None, 1])
2347+
def test_min_count_vs_flox(func: str, min_count: int | None, skipna: bool) -> None:
2348+
da = DataArray(
2349+
data=np.array([np.nan, 1, 1, np.nan, 1, 1]),
2350+
dims="x",
2351+
coords={"labels": ("x", np.array([1, 2, 3, 1, 2, 3]))},
2352+
)
2353+
2354+
gb = da.groupby("labels")
2355+
method = operator.methodcaller(func, min_count=min_count, skipna=skipna)
2356+
with xr.set_options(use_flox=True):
2357+
actual = method(gb)
2358+
with xr.set_options(use_flox=False):
2359+
expected = method(gb)
2360+
assert_identical(actual, expected)
2361+
2362+
2363+
@pytest.mark.parametrize("use_flox", [True, False])
2364+
def test_min_count_error(use_flox: bool) -> None:
2365+
if use_flox and not has_flox:
2366+
pytest.skip()
2367+
da = DataArray(
2368+
data=np.array([np.nan, 1, 1, np.nan, 1, 1]),
2369+
dims="x",
2370+
coords={"labels": ("x", np.array([1, 2, 3, 1, 2, 3]))},
2371+
)
2372+
with xr.set_options(use_flox=use_flox):
2373+
with pytest.raises(TypeError):
2374+
da.groupby("labels").mean(min_count=1)

0 commit comments

Comments
 (0)