Skip to content

Commit ec88c28

Browse files
authored
Fix GroupBy first, last with flox (#10173)
* Fix GroupBy first, last with flox Closes #10169 * fix test * parallelize upstream tests
1 parent fe40be3 commit ec88c28

File tree

5 files changed

+39
-15
lines changed

5 files changed

+39
-15
lines changed

.github/workflows/upstream-dev-ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ jobs:
8484
if: success()
8585
id: status
8686
run: |
87-
python -m pytest --timeout=60 -rf \
87+
python -m pytest --timeout=60 -rf -nauto \
8888
--report-log output-${{ matrix.python-version }}-log.jsonl
8989
- name: Generate and publish the report
9090
if: |

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ Bug fixes
4141
This fixes the issue where using the ``zarr_version`` parameter would raise a deprecation warning telling the user to use
4242
a non-existent ``zarr_format`` parameter instead. (:issue:`10163`, :pull:`10164`)
4343
By `Karl Krauth <https://github.com/Karl-Krauth>`_.
44+
- Fix grouped and resampled ``first``, ``last`` with datetimes (:issue:`10169`, :pull:`10173`)
45+
By `Deepak Cherian <https://github.com/dcherian>`_.
46+
4447

4548
Documentation
4649
~~~~~~~~~~~~~

xarray/core/groupby.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1401,8 +1401,14 @@ def _first_or_last(
14011401
and OPTIONS["use_flox"]
14021402
and contains_only_chunked_or_numpy(self._obj)
14031403
):
1404+
import flox.xrdtypes
1405+
14041406
result = self._flox_reduce(
1405-
dim=None, func=op, skipna=skipna, keep_attrs=keep_attrs
1407+
dim=None,
1408+
func=op,
1409+
skipna=skipna,
1410+
keep_attrs=keep_attrs,
1411+
fill_value=flox.xrdtypes.NA,
14061412
)
14071413
else:
14081414
result = self.reduce(

xarray/tests/conftest.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import xarray as xr
88
from xarray import DataArray, Dataset, DataTree
9-
from xarray.tests import create_test_data, requires_dask
9+
from xarray.tests import create_test_data, has_cftime, requires_dask
1010

1111

1212
@pytest.fixture(params=["numpy", pytest.param("dask", marks=requires_dask)])
@@ -97,6 +97,18 @@ def da(request, backend):
9797
raise ValueError
9898

9999

100+
@pytest.fixture(
101+
params=[
102+
False,
103+
pytest.param(
104+
True, marks=pytest.mark.skipif(not has_cftime, reason="no cftime")
105+
),
106+
]
107+
)
108+
def use_cftime(request):
109+
return request.param
110+
111+
100112
@pytest.fixture(params=[Dataset, DataArray])
101113
def type(request):
102114
return request.param

xarray/tests/test_groupby.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,7 +1845,6 @@ def test_groupby_fastpath_for_monotonic(self, use_flox: bool) -> None:
18451845

18461846
class TestDataArrayResample:
18471847
@pytest.mark.parametrize("shuffle", [True, False])
1848-
@pytest.mark.parametrize("use_cftime", [True, False])
18491848
@pytest.mark.parametrize(
18501849
"resample_freq",
18511850
[
@@ -1906,12 +1905,8 @@ def resample_as_pandas(array, *args, **kwargs):
19061905
with pytest.raises(ValueError):
19071906
reverse.resample(time=resample_freq).mean()
19081907

1909-
@pytest.mark.parametrize("use_cftime", [True, False])
19101908
def test_resample_doctest(self, use_cftime: bool) -> None:
19111909
# run the doctest example here so we are not surprised
1912-
if use_cftime and not has_cftime:
1913-
pytest.skip()
1914-
19151910
da = xr.DataArray(
19161911
np.array([1, 2, 3, 1, 2, np.nan]),
19171912
dims="time",
@@ -1947,8 +1942,10 @@ def func(arg1, arg2, arg3=0.0):
19471942
actual = da.resample(time="D").map(func, args=(1.0,), arg3=1.0)
19481943
assert_identical(actual, expected)
19491944

1950-
def test_resample_first(self) -> None:
1951-
times = pd.date_range("2000-01-01", freq="6h", periods=10)
1945+
def test_resample_first_last(self, use_cftime) -> None:
1946+
times = xr.date_range(
1947+
"2000-01-01", freq="6h", periods=10, use_cftime=use_cftime
1948+
)
19521949
array = DataArray(np.arange(10), [("time", times)])
19531950

19541951
# resample to same frequency
@@ -1961,7 +1958,7 @@ def test_resample_first(self) -> None:
19611958

19621959
# verify that labels don't use the first value
19631960
actual = array.resample(time="24h").first()
1964-
expected = DataArray(array.to_series().resample("24h").first())
1961+
expected = array.isel(time=[0, 4, 8])
19651962
assert_identical(expected, actual)
19661963

19671964
# missing values
@@ -1978,10 +1975,17 @@ def test_resample_first(self) -> None:
19781975
# regression test for https://stackoverflow.com/questions/33158558/
19791976
array = Dataset({"time": times})["time"]
19801977
actual = array.resample(time="1D").last()
1981-
expected_times = pd.to_datetime(
1982-
["2000-01-01T18", "2000-01-02T18", "2000-01-03T06"], unit="ns"
1978+
expected = array.isel(time=[3, 7, 9]).assign_coords(time=times[::4])
1979+
assert_identical(expected, actual)
1980+
1981+
# missing periods, GH10169
1982+
actual = array.isel(time=[0, 1, 2, 3, 8, 9]).resample(time="1D").last()
1983+
expected = DataArray(
1984+
np.array([times[3], np.datetime64("NaT"), times[9]]),
1985+
dims="time",
1986+
coords={"time": times[::4]},
1987+
name="time",
19831988
)
1984-
expected = DataArray(expected_times, [("time", times[::4])], name="time")
19851989
assert_identical(expected, actual)
19861990

19871991
def test_resample_bad_resample_dim(self) -> None:
@@ -2298,7 +2302,6 @@ def test_resample_origin(self) -> None:
22982302

22992303

23002304
class TestDatasetResample:
2301-
@pytest.mark.parametrize("use_cftime", [True, False])
23022305
@pytest.mark.parametrize(
23032306
"resample_freq",
23042307
[

0 commit comments

Comments
 (0)