Skip to content

Commit c4b3f1c

Browse files
fix roundtripping zero-size timedelta arrays (#10313)
* fix roundtripping zero-size timedelta arrays * More speaking variable, add whats-new.rst * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 4259dfa commit c4b3f1c

File tree

3 files changed

+36
-7
lines changed

3 files changed

+36
-7
lines changed

doc/whats-new.rst

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,15 @@ Bug fixes
4242
~~~~~~~~~
4343
- Fix :py:class:`~xarray.groupers.BinGrouper` when ``labels`` is not specified (:issue:`10284`).
4444
By `Deepak Cherian <https://github.com/dcherian>`_.
45-
4645
- Allow accessing arbitrary attributes on Pandas ExtensionArrays.
4746
By `Deepak Cherian <https://github.com/dcherian>`_.
48-
- Use dtype from intermediate sum instead of source dtype or "int" for casting of count when calculating mean in rolling for correct operations (preserve float dtypes, correct mean of bool arrays) (:issue:`10340`, :pull:`10341`).
49-
47+
- Fix coding empty (zero-size) timedelta64 arrays, ``units`` taking precedence when encoding,
48+
fallback to default values when decoding (:issue:`10310`, :pull:`10313`).
49+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
50+
- Use dtype from intermediate sum instead of source dtype or "int" for casting of count when
51+
calculating mean in rolling for correct operations (preserve float dtypes,
52+
correct mean of bool arrays) (:issue:`10340`, :pull:`10341`).
53+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
5054

5155
Documentation
5256
~~~~~~~~~~~~~

xarray/coding/times.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -659,22 +659,28 @@ def decode_cf_timedelta(
659659
num_timedeltas = to_numpy(num_timedeltas)
660660
unit = _netcdf_to_numpy_timeunit(units)
661661

662+
# special case empty arrays
663+
is_empty_array = num_timedeltas.size == 0
664+
662665
with warnings.catch_warnings():
663666
warnings.filterwarnings("ignore", "All-NaN slice encountered", RuntimeWarning)
664-
_check_timedelta_range(np.nanmin(num_timedeltas), unit, time_unit)
665-
_check_timedelta_range(np.nanmax(num_timedeltas), unit, time_unit)
667+
if not is_empty_array:
668+
_check_timedelta_range(np.nanmin(num_timedeltas), unit, time_unit)
669+
_check_timedelta_range(np.nanmax(num_timedeltas), unit, time_unit)
666670

667671
timedeltas = _numbers_to_timedelta(
668672
num_timedeltas, unit, "s", "timedeltas", target_unit=time_unit
669673
)
670674
pd_timedeltas = pd.to_timedelta(ravel(timedeltas))
671675

672-
if np.isnat(timedeltas).all():
676+
if not is_empty_array and np.isnat(timedeltas).all():
673677
empirical_unit = time_unit
674678
else:
675679
empirical_unit = pd_timedeltas.unit
676680

677-
if np.timedelta64(1, time_unit) > np.timedelta64(1, empirical_unit):
681+
if is_empty_array or np.timedelta64(1, time_unit) > np.timedelta64(
682+
1, empirical_unit
683+
):
678684
time_unit = empirical_unit
679685

680686
if time_unit not in {"s", "ms", "us", "ns"}:
@@ -1230,6 +1236,9 @@ def _eagerly_encode_cf_timedelta(
12301236
data_units = infer_timedelta_units(timedeltas)
12311237
if units is None:
12321238
units = data_units
1239+
# units take precedence in the case of zero-size array
1240+
if timedeltas.size == 0:
1241+
data_units = units
12331242

12341243
time_delta = _unit_timedelta_numpy(units)
12351244
time_deltas = pd.TimedeltaIndex(ravel(timedeltas))

xarray/tests/test_coding_times.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1959,3 +1959,19 @@ def test_decode_floating_point_timedelta_no_serialization_warning() -> None:
19591959
decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True)
19601960
with assert_no_warnings():
19611961
decoded.load()
1962+
1963+
1964+
def test_roundtrip_0size_timedelta(time_unit: PDDatetimeUnitOptions) -> None:
1965+
# regression test for GitHub issue #10310
1966+
encoding = {"units": "days", "dtype": np.dtype("int64")}
1967+
data = np.array([], dtype=f"=m8[{time_unit}]")
1968+
decoded = Variable(["time"], data, encoding=encoding)
1969+
encoded = conventions.encode_cf_variable(decoded, name="foo")
1970+
assert encoded.dtype == encoding["dtype"]
1971+
assert encoded.attrs["units"] == encoding["units"]
1972+
decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True)
1973+
assert decoded.dtype == np.dtype("=m8[ns]")
1974+
with assert_no_warnings():
1975+
decoded.load()
1976+
assert decoded.dtype == np.dtype("=m8[s]")
1977+
assert decoded.encoding == encoding

0 commit comments

Comments
 (0)