fix roundtripping zero-size timedelta arrays (#10313)

kmuehlbauer · pre-commit-ci[bot] · web-flow · commit c4b3f1cb27d8 · 2025-05-23T12:34:37.000+02:00
* fix roundtripping zero-size timedelta arrays * More speaking variable, add whats-new.rst * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -42,11 +42,15 @@ Bug fixes
 ~~~~~~~~~
 - Fix :py:class:`~xarray.groupers.BinGrouper` when ``labels`` is not specified (:issue:`10284`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
-
 - Allow accessing arbitrary attributes on Pandas ExtensionArrays.
   By `Deepak Cherian <https://github.com/dcherian>`_.
-- Use dtype from intermediate sum instead of source dtype or "int" for casting of count when calculating mean in rolling for correct operations (preserve float dtypes, correct mean of bool arrays) (:issue:`10340`, :pull:`10341`).
-
+- Fix coding empty (zero-size) timedelta64 arrays, ``units`` taking precedence when encoding,
+  fallback to default values when decoding (:issue:`10310`, :pull:`10313`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
+- Use dtype from intermediate sum instead of source dtype or "int" for casting of count when
+  calculating mean in rolling for correct operations (preserve float dtypes,
+  correct mean of bool arrays) (:issue:`10340`, :pull:`10341`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -659,22 +659,28 @@ def decode_cf_timedelta(
     num_timedeltas = to_numpy(num_timedeltas)
     unit = _netcdf_to_numpy_timeunit(units)
 
+    # special case empty arrays
+    is_empty_array = num_timedeltas.size == 0
+
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", "All-NaN slice encountered", RuntimeWarning)
-        _check_timedelta_range(np.nanmin(num_timedeltas), unit, time_unit)
-        _check_timedelta_range(np.nanmax(num_timedeltas), unit, time_unit)
+        if not is_empty_array:
+            _check_timedelta_range(np.nanmin(num_timedeltas), unit, time_unit)
+            _check_timedelta_range(np.nanmax(num_timedeltas), unit, time_unit)
 
     timedeltas = _numbers_to_timedelta(
         num_timedeltas, unit, "s", "timedeltas", target_unit=time_unit
     )
     pd_timedeltas = pd.to_timedelta(ravel(timedeltas))
 
-    if np.isnat(timedeltas).all():
+    if not is_empty_array and np.isnat(timedeltas).all():
         empirical_unit = time_unit
     else:
         empirical_unit = pd_timedeltas.unit
 
-    if np.timedelta64(1, time_unit) > np.timedelta64(1, empirical_unit):
+    if is_empty_array or np.timedelta64(1, time_unit) > np.timedelta64(
+        1, empirical_unit
+    ):
         time_unit = empirical_unit
 
     if time_unit not in {"s", "ms", "us", "ns"}:
@@ -1230,6 +1236,9 @@ def _eagerly_encode_cf_timedelta(
     data_units = infer_timedelta_units(timedeltas)
     if units is None:
         units = data_units
+    # units take precedence in the case of zero-size array
+    if timedeltas.size == 0:
+        data_units = units
 
     time_delta = _unit_timedelta_numpy(units)
     time_deltas = pd.TimedeltaIndex(ravel(timedeltas))
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
@@ -1959,3 +1959,19 @@ def test_decode_floating_point_timedelta_no_serialization_warning() -> None:
     decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True)
     with assert_no_warnings():
         decoded.load()
+
+
+def test_roundtrip_0size_timedelta(time_unit: PDDatetimeUnitOptions) -> None:
+    # regression test for GitHub issue #10310
+    encoding = {"units": "days", "dtype": np.dtype("int64")}
+    data = np.array([], dtype=f"=m8[{time_unit}]")
+    decoded = Variable(["time"], data, encoding=encoding)
+    encoded = conventions.encode_cf_variable(decoded, name="foo")
+    assert encoded.dtype == encoding["dtype"]
+    assert encoded.attrs["units"] == encoding["units"]
+    decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True)
+    assert decoded.dtype == np.dtype("=m8[ns]")
+    with assert_no_warnings():
+        decoded.load()
+    assert decoded.dtype == np.dtype("=m8[s]")
+    assert decoded.encoding == encoding