Skip to content

Commit 2553762

Browse files
spencerkclarkpre-commit-ci[bot]dcherian
authored
Enable taking the mean of dask-backed cftime arrays (#6940)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
1 parent abe1e61 commit 2553762

File tree

4 files changed

+62
-25
lines changed

4 files changed

+62
-25
lines changed

ci/requirements/min-all-deps.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ dependencies:
1515
- cfgrib=0.9
1616
- cftime=1.4
1717
- coveralls
18-
- dask-core=2021.04
19-
- distributed=2021.04
18+
- dask-core=2021.08.0
19+
- distributed=2021.08.0
2020
- flox=0.5
2121
- h5netcdf=0.11
2222
- h5py=3.1

doc/whats-new.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ v2022.07.0 (unreleased)
2121

2222
New Features
2323
~~~~~~~~~~~~
24-
24+
- Enable taking the mean of dask-backed :py:class:`cftime.datetime` arrays
25+
(:pull:`6556`, :pull:`6940`). By `Deepak Cherian
26+
<https://github.com/dcherian>`_ and `Spencer Clark
27+
<https://github.com/spencerkclark>`_.
2528

2629
Breaking changes
2730
~~~~~~~~~~~~~~~~
@@ -53,6 +56,9 @@ Bug fixes
5356
By `Oliver Lopez <https://github.com/lopezvoliver>`_.
5457
- Fix bug where index variables would be changed inplace (:issue:`6931`, :pull:`6938`)
5558
By `Michael Niklas <https://github.com/headtr1ck>`_.
59+
- Allow taking the mean over non-time dimensions of datasets containing
60+
dask-backed cftime arrays (:issue:`5897`, :pull:`6950`). By `Spencer Clark
61+
<https://github.com/spencerkclark>`_.
5662
- Harmonize returned multi-indexed indexes when applying ``concat`` along new dimension (:issue:`6881`, :pull:`6889`)
5763
By `Fabian Hofmann <https://github.com/FabianHofmann>`_.
5864
- Fix step plots with ``hue`` arg. (:pull:`6944`)

xarray/core/duck_array_ops.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,6 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
426426
though some calendars would allow for them (e.g. no_leap). This is because there
427427
is no `cftime.timedelta` object.
428428
"""
429-
# TODO: make this function dask-compatible?
430429
# Set offset to minimum if not given
431430
if offset is None:
432431
if array.dtype.kind in "Mm":
@@ -531,7 +530,10 @@ def pd_timedelta_to_float(value, datetime_unit):
531530

532531

533532
def _timedelta_to_seconds(array):
534-
return np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6
533+
if isinstance(array, datetime.timedelta):
534+
return array.total_seconds() * 1e6
535+
else:
536+
return np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6
535537

536538

537539
def py_timedelta_to_float(array, datetime_unit):
@@ -565,12 +567,6 @@ def mean(array, axis=None, skipna=None, **kwargs):
565567
+ offset
566568
)
567569
elif _contains_cftime_datetimes(array):
568-
if is_duck_dask_array(array):
569-
raise NotImplementedError(
570-
"Computing the mean of an array containing "
571-
"cftime.datetime objects is not yet implemented on "
572-
"dask arrays."
573-
)
574570
offset = min(array)
575571
timedeltas = datetime_to_numeric(array, offset, datetime_unit="us")
576572
mean_timedeltas = _mean(timedeltas, axis=axis, skipna=skipna, **kwargs)

xarray/tests/test_duck_array_ops.py

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -323,18 +323,51 @@ def test_datetime_mean(dask: bool) -> None:
323323

324324

325325
@requires_cftime
326-
def test_cftime_datetime_mean():
326+
@pytest.mark.parametrize("dask", [False, True])
327+
def test_cftime_datetime_mean(dask):
328+
if dask and not has_dask:
329+
pytest.skip("requires dask")
330+
327331
times = cftime_range("2000", periods=4)
328332
da = DataArray(times, dims=["time"])
333+
da_2d = DataArray(times.values.reshape(2, 2))
329334

330-
assert da.isel(time=0).mean() == da.isel(time=0)
335+
if dask:
336+
da = da.chunk({"time": 2})
337+
da_2d = da_2d.chunk({"dim_0": 2})
338+
339+
expected = da.isel(time=0)
340+
# one compute needed to check the array contains cftime datetimes
341+
with raise_if_dask_computes(max_computes=1):
342+
result = da.isel(time=0).mean()
343+
assert_dask_array(result, dask)
344+
assert_equal(result, expected)
331345

332346
expected = DataArray(times.date_type(2000, 1, 2, 12))
333-
result = da.mean()
347+
with raise_if_dask_computes(max_computes=1):
348+
result = da.mean()
349+
assert_dask_array(result, dask)
334350
assert_equal(result, expected)
335351

336-
da_2d = DataArray(times.values.reshape(2, 2))
337-
result = da_2d.mean()
352+
with raise_if_dask_computes(max_computes=1):
353+
result = da_2d.mean()
354+
assert_dask_array(result, dask)
355+
assert_equal(result, expected)
356+
357+
358+
@requires_cftime
359+
@requires_dask
360+
def test_mean_over_non_time_dim_of_dataset_with_dask_backed_cftime_data():
361+
# Regression test for part two of GH issue 5897: averaging over a non-time
362+
# dimension still fails if the time variable is dask-backed.
363+
ds = Dataset(
364+
{
365+
"var1": (("time",), cftime_range("2021-10-31", periods=10, freq="D")),
366+
"var2": (("x",), list(range(10))),
367+
}
368+
)
369+
expected = ds.mean("x")
370+
result = ds.chunk({}).mean("x")
338371
assert_equal(result, expected)
339372

340373

@@ -372,15 +405,6 @@ def test_cftime_datetime_mean_long_time_period():
372405
assert_equal(result, expected)
373406

374407

375-
@requires_cftime
376-
@requires_dask
377-
def test_cftime_datetime_mean_dask_error():
378-
times = cftime_range("2000", periods=4)
379-
da = DataArray(times, dims=["time"]).chunk()
380-
with pytest.raises(NotImplementedError):
381-
da.mean()
382-
383-
384408
def test_empty_axis_dtype():
385409
ds = Dataset()
386410
ds["pos"] = [1, 2, 3]
@@ -742,6 +766,17 @@ def test_datetime_to_numeric_cftime(dask):
742766
expected = 24 * np.arange(0, 35, 7).astype(dtype)
743767
np.testing.assert_array_equal(result, expected)
744768

769+
with raise_if_dask_computes():
770+
if dask:
771+
time = dask.array.asarray(times[1])
772+
else:
773+
time = np.asarray(times[1])
774+
result = duck_array_ops.datetime_to_numeric(
775+
time, offset=times[0], datetime_unit="h", dtype=int
776+
)
777+
expected = np.array(24 * 7).astype(int)
778+
np.testing.assert_array_equal(result, expected)
779+
745780

746781
@requires_cftime
747782
def test_datetime_to_numeric_potential_overflow():

0 commit comments

Comments
 (0)