Skip to content

Commit df2ecf4

Browse files
authored
use mean of min/max years as offset in calculation of datetime64 mean (#10035)
* use mean of min/max years as offset in caclulation of datetime64 mean * reinstate _datetime_nanmin as it is used downstream in flox<0.10.0 * add whats-new.rst entry * add whats-new.rst entry
1 parent d57f05c commit df2ecf4

File tree

3 files changed

+28
-5
lines changed

3 files changed

+28
-5
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ Bug fixes
3939
"nanoseconds" were chosen by default, which are optimal for
4040
nanosecond-resolution times, but not for times with coarser resolution. By
4141
`Spencer Clark <https://github.com/spencerkclark>`_ (:pull:`10017`).
42+
- Use mean of min/max years as offset in calculation of datetime64 mean
43+
(:issue:`10019`, :pull:`10035`).
44+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
4245

4346

4447
Documentation

xarray/core/duck_array_ops.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,11 @@ def array_any(array, axis=None, keepdims=False, **kwargs):
550550

551551

552552
def _datetime_nanmin(array):
553-
"""nanmin() function for datetime64.
553+
return _datetime_nanreduce(array, min)
554+
555+
556+
def _datetime_nanreduce(array, func):
557+
"""nanreduce() function for datetime64.
554558
555559
Caveats that this function deals with:
556560
@@ -562,7 +566,7 @@ def _datetime_nanmin(array):
562566
assert dtypes.is_datetime_like(dtype)
563567
# (NaT).astype(float) does not produce NaN...
564568
array = where(pandas_isnull(array), np.nan, array.astype(float))
565-
array = min(array, skipna=True)
569+
array = func(array, skipna=True)
566570
if isinstance(array, float):
567571
array = np.array(array)
568572
# ...but (NaN).astype("M8") does produce NaT
@@ -597,7 +601,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
597601
# Set offset to minimum if not given
598602
if offset is None:
599603
if dtypes.is_datetime_like(array.dtype):
600-
offset = _datetime_nanmin(array)
604+
offset = _datetime_nanreduce(array, min)
601605
else:
602606
offset = min(array)
603607

@@ -717,8 +721,11 @@ def mean(array, axis=None, skipna=None, **kwargs):
717721

718722
array = asarray(array)
719723
if dtypes.is_datetime_like(array.dtype):
720-
offset = _datetime_nanmin(array)
721-
724+
dmin = _datetime_nanreduce(array, min).astype("datetime64[Y]").astype(int)
725+
dmax = _datetime_nanreduce(array, max).astype("datetime64[Y]").astype(int)
726+
offset = (
727+
np.array((dmin + dmax) // 2).astype("datetime64[Y]").astype(array.dtype)
728+
)
722729
# From version 2025.01.2 xarray uses np.datetime64[unit], where unit
723730
# is one of "s", "ms", "us", "ns".
724731
# To not have to worry about the resolution, we just convert the output

xarray/tests/test_duck_array_ops.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,19 @@ def test_cftime_datetime_mean(dask):
481481
assert_equal(result, expected)
482482

483483

484+
@pytest.mark.parametrize("dask", [False, True])
485+
def test_mean_over_long_spanning_datetime64(dask) -> None:
486+
if dask and not has_dask:
487+
pytest.skip("requires dask")
488+
array = np.array(["1678-01-01", "NaT", "2260-01-01"], dtype="datetime64[ns]")
489+
da = DataArray(array, dims=["time"])
490+
if dask:
491+
da = da.chunk({"time": 2})
492+
expected = DataArray(np.array("1969-01-01", dtype="datetime64[ns]"))
493+
result = da.mean()
494+
assert_equal(result, expected)
495+
496+
484497
@requires_cftime
485498
@requires_dask
486499
def test_mean_over_non_time_dim_of_dataset_with_dask_backed_cftime_data():

0 commit comments

Comments
 (0)