Skip to content

Commit 78ee425

Browse files
committed
GroupBy: Finish eagerly_compute_group deprecation
1 parent 72ffff5 commit 78ee425

File tree

5 files changed

+54
-95
lines changed

5 files changed

+54
-95
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ Breaking changes
3232
Deprecations
3333
~~~~~~~~~~~~
3434

35+
- The deprecation cycle for the ``eagerly_compute_group`` kwarg to ``groupby`` and ``groupby_bins``
36+
is now complete. The only acceptable value now is ``False``.
37+
By `Deepak Cherian <https://github.com/dcherian>`_.
3538

3639
Bug fixes
3740
~~~~~~~~~

xarray/core/dataarray.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6813,7 +6813,7 @@ def groupby(
68136813
*,
68146814
squeeze: Literal[False] = False,
68156815
restore_coord_dims: bool = False,
6816-
eagerly_compute_group: bool = True,
6816+
eagerly_compute_group: Literal[False] = False,
68176817
**groupers: Grouper,
68186818
) -> DataArrayGroupBy:
68196819
"""Returns a DataArrayGroupBy object for performing grouped operations.
@@ -6830,10 +6830,7 @@ def groupby(
68306830
If True, also restore the dimension order of multi-dimensional
68316831
coordinates.
68326832
eagerly_compute_group: bool
6833-
Whether to eagerly compute ``group`` when it is a chunked array.
6834-
This option is to maintain backwards compatibility. Set to False
6835-
to opt-in to future behaviour, where ``group`` is not automatically loaded
6836-
into memory.
6833+
This argument is deprecated.
68376834
**groupers : Mapping of str to Grouper or Resampler
68386835
Mapping of variable name to group by to :py:class:`Grouper` or :py:class:`Resampler` object.
68396836
One of ``group`` or ``groupers`` must be provided.
@@ -6965,7 +6962,7 @@ def groupby_bins(
69656962
squeeze: Literal[False] = False,
69666963
restore_coord_dims: bool = False,
69676964
duplicates: Literal["raise", "drop"] = "raise",
6968-
eagerly_compute_group: bool = True,
6965+
eagerly_compute_group: Literal[False] = False,
69696966
) -> DataArrayGroupBy:
69706967
"""Returns a DataArrayGroupBy object for performing grouped operations.
69716968
@@ -7003,10 +7000,7 @@ def groupby_bins(
70037000
duplicates : {"raise", "drop"}, default: "raise"
70047001
If bin edges are not unique, raise ValueError or drop non-uniques.
70057002
eagerly_compute_group: bool
7006-
Whether to eagerly compute ``group`` when it is a chunked array.
7007-
This option is to maintain backwards compatibility. Set to False
7008-
to opt-in to future behaviour, where ``group`` is not automatically loaded
7009-
into memory.
7003+
This argument is deprecated.
70107004
70117005
Returns
70127006
-------

xarray/core/dataset.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9824,7 +9824,7 @@ def groupby(
98249824
*,
98259825
squeeze: Literal[False] = False,
98269826
restore_coord_dims: bool = False,
9827-
eagerly_compute_group: bool = True,
9827+
eagerly_compute_group: Literal[False] = False,
98289828
**groupers: Grouper,
98299829
) -> DatasetGroupBy:
98309830
"""Returns a DatasetGroupBy object for performing grouped operations.
@@ -9840,11 +9840,8 @@ def groupby(
98409840
restore_coord_dims : bool, default: False
98419841
If True, also restore the dimension order of multi-dimensional
98429842
coordinates.
9843-
eagerly_compute_group: bool
9844-
Whether to eagerly compute ``group`` when it is a chunked array.
9845-
This option is to maintain backwards compatibility. Set to False
9846-
to opt-in to future behaviour, where ``group`` is not automatically loaded
9847-
into memory.
9843+
eagerly_compute_group: False
9844+
This argument is deprecated.
98489845
**groupers : Mapping of str to Grouper or Resampler
98499846
Mapping of variable name to group by to :py:class:`Grouper` or :py:class:`Resampler` object.
98509847
One of ``group`` or ``groupers`` must be provided.
@@ -9945,7 +9942,7 @@ def groupby_bins(
99459942
squeeze: Literal[False] = False,
99469943
restore_coord_dims: bool = False,
99479944
duplicates: Literal["raise", "drop"] = "raise",
9948-
eagerly_compute_group: bool = True,
9945+
eagerly_compute_group: Literal[False] = False,
99499946
) -> DatasetGroupBy:
99509947
"""Returns a DatasetGroupBy object for performing grouped operations.
99519948
@@ -9982,11 +9979,8 @@ def groupby_bins(
99829979
coordinates.
99839980
duplicates : {"raise", "drop"}, default: "raise"
99849981
If bin edges are not unique, raise ValueError or drop non-uniques.
9985-
eagerly_compute_group: bool
9986-
Whether to eagerly compute ``group`` when it is a chunked array.
9987-
This option is to maintain backwards compatibility. Set to False
9988-
to opt-in to future behaviour, where ``group`` is not automatically loaded
9989-
into memory.
9982+
eagerly_compute_group: False
9983+
This argument is deprecated.
99909984
99919985
Returns
99929986
-------

xarray/core/groupby.py

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
FrozenMappingWarningOnValuesAccess,
4343
contains_only_chunked_or_numpy,
4444
either_dict_or_kwargs,
45-
emit_user_level_warning,
4645
hashable,
4746
is_scalar,
4847
maybe_wrap_array,
@@ -294,7 +293,7 @@ class ResolvedGrouper(Generic[T_DataWithCoords]):
294293
grouper: Grouper
295294
group: T_Group
296295
obj: T_DataWithCoords
297-
eagerly_compute_group: bool = field(repr=False)
296+
eagerly_compute_group: Literal[False] = field(repr=False)
298297

299298
# returned by factorize:
300299
encoded: EncodedGroups = field(init=False, repr=False)
@@ -323,39 +322,33 @@ def __post_init__(self) -> None:
323322

324323
self.group = _resolve_group(self.obj, self.group)
325324

325+
if self.eagerly_compute_group is not False:
326+
raise ValueError(
327+
f""""Eagerly computing the DataArray you're grouping by ({self.group.name!r}) "
328+
has been removed.
329+
Please load this array's data manually using `.compute` or `.load`.
330+
To intentionally avoid eager loading, either (1) specify
331+
`.groupby({self.group.name}=UniqueGrouper(labels=...), eagerly_load_group=False)`
332+
or (2) pass explicit bin edges using or `.groupby({self.group.name}=BinGrouper(bins=...),
333+
eagerly_load_group=False)`; as appropriate."""
334+
)
335+
326336
if not isinstance(self.group, _DummyGroup) and is_chunked_array(
327337
self.group.variable._data
328338
):
329-
if self.eagerly_compute_group is False:
330-
# This requires a pass to discover the groups present
331-
if (
332-
isinstance(self.grouper, UniqueGrouper)
333-
and self.grouper.labels is None
334-
):
335-
raise ValueError(
336-
"Please pass `labels` to UniqueGrouper when grouping by a chunked array."
337-
)
338-
# this requires a pass to compute the bin edges
339-
if isinstance(self.grouper, BinGrouper) and isinstance(
340-
self.grouper.bins, int
341-
):
342-
raise ValueError(
343-
"Please pass explicit bin edges to BinGrouper using the ``bins`` kwarg"
344-
"when grouping by a chunked array."
345-
)
346-
347-
if self.eagerly_compute_group:
348-
emit_user_level_warning(
349-
f""""Eagerly computing the DataArray you're grouping by ({self.group.name!r}) "
350-
is deprecated and will raise an error in v2025.05.0.
351-
Please load this array's data manually using `.compute` or `.load`.
352-
To intentionally avoid eager loading, either (1) specify
353-
`.groupby({self.group.name}=UniqueGrouper(labels=...), eagerly_load_group=False)`
354-
or (2) pass explicit bin edges using or `.groupby({self.group.name}=BinGrouper(bins=...),
355-
eagerly_load_group=False)`; as appropriate.""",
356-
DeprecationWarning,
339+
# This requires a pass to discover the groups present
340+
if isinstance(self.grouper, UniqueGrouper) and self.grouper.labels is None:
341+
raise ValueError(
342+
"Please pass `labels` to UniqueGrouper when grouping by a chunked array."
343+
)
344+
# this requires a pass to compute the bin edges
345+
if isinstance(self.grouper, BinGrouper) and isinstance(
346+
self.grouper.bins, int
347+
):
348+
raise ValueError(
349+
"Please pass explicit bin edges to BinGrouper using the ``bins`` kwarg"
350+
"when grouping by a chunked array."
357351
)
358-
self.group = self.group.compute()
359352

360353
self.encoded = self.grouper.factorize(self.group)
361354

xarray/tests/test_groupby.py

Lines changed: 17 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2914,33 +2914,21 @@ def test_multiple_groupers(use_flox: bool, shuffle: bool) -> None:
29142914

29152915
if has_dask:
29162916
b["xy"] = b["xy"].chunk()
2917-
for eagerly_compute_group in [True, False]:
2918-
kwargs = dict(
2919-
x=UniqueGrouper(),
2920-
xy=UniqueGrouper(labels=["a", "b", "c"]),
2921-
eagerly_compute_group=eagerly_compute_group,
2922-
)
2923-
expected = xr.DataArray(
2924-
[[[1, 1, 1], [np.nan, 1, 2]]] * 4,
2925-
dims=("z", "x", "xy"),
2926-
coords={"xy": ("xy", ["a", "b", "c"], {"foo": "bar"})},
2927-
)
2928-
if eagerly_compute_group:
2929-
with raise_if_dask_computes(max_computes=1):
2930-
with pytest.warns(DeprecationWarning):
2931-
gb = b.groupby(**kwargs) # type: ignore[arg-type]
2932-
assert_identical(gb.count(), expected)
2933-
else:
2934-
with raise_if_dask_computes(max_computes=0):
2935-
gb = b.groupby(**kwargs) # type: ignore[arg-type]
2936-
assert is_chunked_array(gb.encoded.codes.data)
2937-
assert not gb.encoded.group_indices
2938-
if has_flox:
2939-
with raise_if_dask_computes(max_computes=1):
2940-
assert_identical(gb.count(), expected)
2941-
else:
2942-
with pytest.raises(ValueError, match="when lazily grouping"):
2943-
gb.count()
2917+
expected = xr.DataArray(
2918+
[[[1, 1, 1], [np.nan, 1, 2]]] * 4,
2919+
dims=("z", "x", "xy"),
2920+
coords={"xy": ("xy", ["a", "b", "c"], {"foo": "bar"})},
2921+
)
2922+
with raise_if_dask_computes(max_computes=0):
2923+
gb = b.groupby(x=UniqueGrouper(), xy=UniqueGrouper(labels=["a", "b", "c"])) # type: ignore[arg-type]
2924+
assert is_chunked_array(gb.encoded.codes.data)
2925+
assert not gb.encoded.group_indices
2926+
if has_flox:
2927+
with raise_if_dask_computes(max_computes=1):
2928+
assert_identical(gb.count(), expected)
2929+
else:
2930+
with pytest.raises(ValueError, match="when lazily grouping"):
2931+
gb.count()
29442932

29452933

29462934
@pytest.mark.parametrize("use_flox", [True, False])
@@ -3264,31 +3252,18 @@ def test_groupby_dask_eager_load_warnings() -> None:
32643252
coords={"x": ("z", np.arange(12)), "y": ("z", np.arange(12))},
32653253
).chunk(z=6)
32663254

3267-
with pytest.warns(DeprecationWarning):
3268-
ds.groupby(x=UniqueGrouper())
3269-
3270-
with pytest.warns(DeprecationWarning):
3271-
ds.groupby("x")
3272-
3273-
with pytest.warns(DeprecationWarning):
3274-
ds.groupby(ds.x)
3275-
32763255
with pytest.raises(ValueError, match="Please pass"):
32773256
ds.groupby("x", eagerly_compute_group=False)
32783257

32793258
# This is technically fine but anyone iterating over the groupby object
32803259
# will see an error, so let's warn and have them opt-in.
3281-
with pytest.warns(DeprecationWarning):
3282-
ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]))
3260+
ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]))
32833261

32843262
ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]), eagerly_compute_group=False)
32853263

3286-
with pytest.warns(DeprecationWarning):
3287-
ds.groupby_bins("x", bins=3)
32883264
with pytest.raises(ValueError, match="Please pass"):
32893265
ds.groupby_bins("x", bins=3, eagerly_compute_group=False)
3290-
with pytest.warns(DeprecationWarning):
3291-
ds.groupby_bins("x", bins=[1, 2, 3])
3266+
ds.groupby_bins("x", bins=[1, 2, 3])
32923267
ds.groupby_bins("x", bins=[1, 2, 3], eagerly_compute_group=False)
32933268

32943269

0 commit comments

Comments
 (0)