|
92 | 92 | )
|
93 | 93 |
|
94 | 94 |
|
| 95 | +_INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS = [ |
| 96 | + "add_offset", |
| 97 | + "scale_factor", |
| 98 | +] |
| 99 | + |
| 100 | + |
95 | 101 | def _is_standard_calendar(calendar: str) -> bool:
|
96 | 102 | return calendar.lower() in _STANDARD_CALENDARS
|
97 | 103 |
|
@@ -1403,62 +1409,169 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
|
1403 | 1409 | return variable
|
1404 | 1410 |
|
1405 | 1411 |
|
| 1412 | +def has_timedelta64_encoding_dtype(attrs_or_encoding: dict) -> bool: |
| 1413 | + dtype = attrs_or_encoding.get("dtype", None) |
| 1414 | + return isinstance(dtype, str) and dtype.startswith("timedelta64") |
| 1415 | + |
| 1416 | + |
1406 | 1417 | class CFTimedeltaCoder(VariableCoder):
|
1407 | 1418 | """Coder for CF Timedelta coding.
|
1408 | 1419 |
|
1409 | 1420 | Parameters
|
1410 | 1421 | ----------
|
1411 | 1422 | time_unit : PDDatetimeUnitOptions
|
1412 |
| - Target resolution when decoding timedeltas. Defaults to "ns". |
| 1423 | + Target resolution when decoding timedeltas via units. Defaults to "ns". |
| 1424 | + When decoding via dtype, the resolution is specified in the dtype |
| 1425 | + attribute, so this parameter is ignored. |
| 1426 | + decode_via_units : bool |
| 1427 | + Whether to decode timedeltas based on the presence of a timedelta-like |
| 1428 | + units attribute, e.g. "seconds". Defaults to True, but in the future |
| 1429 | + will default to False. |
| 1430 | + decode_via_dtype : bool |
| 1431 | + Whether to decode timedeltas based on the presence of a np.timedelta64 |
| 1432 | + dtype attribute, e.g. "timedelta64[s]". Defaults to True. |
1413 | 1433 | """
|
1414 | 1434 |
|
1415 | 1435 | def __init__(
|
1416 | 1436 | self,
|
1417 | 1437 | time_unit: PDDatetimeUnitOptions = "ns",
|
| 1438 | + decode_via_units: bool = True, |
| 1439 | + decode_via_dtype: bool = True, |
1418 | 1440 | ) -> None:
|
1419 | 1441 | self.time_unit = time_unit
|
| 1442 | + self.decode_via_units = decode_via_units |
| 1443 | + self.decode_via_dtype = decode_via_dtype |
1420 | 1444 | self._emit_decode_timedelta_future_warning = False
|
1421 | 1445 |
|
1422 | 1446 | def encode(self, variable: Variable, name: T_Name = None) -> Variable:
|
1423 | 1447 | if np.issubdtype(variable.data.dtype, np.timedelta64):
|
1424 | 1448 | dims, data, attrs, encoding = unpack_for_encoding(variable)
|
| 1449 | + has_timedelta_dtype = has_timedelta64_encoding_dtype(encoding) |
| 1450 | + if ("units" in encoding or "dtype" in encoding) and not has_timedelta_dtype: |
| 1451 | + dtype = encoding.get("dtype", None) |
| 1452 | + units = encoding.pop("units", None) |
1425 | 1453 |
|
1426 |
| - dtype = encoding.get("dtype", None) |
1427 |
| - |
1428 |
| - # in the case of packed data we need to encode into |
1429 |
| - # float first, the correct dtype will be established |
1430 |
| - # via CFScaleOffsetCoder/CFMaskCoder |
1431 |
| - if "add_offset" in encoding or "scale_factor" in encoding: |
1432 |
| - dtype = data.dtype if data.dtype.kind == "f" else "float64" |
| 1454 | + # in the case of packed data we need to encode into |
| 1455 | + # float first, the correct dtype will be established |
| 1456 | + # via CFScaleOffsetCoder/CFMaskCoder |
| 1457 | + if "add_offset" in encoding or "scale_factor" in encoding: |
| 1458 | + dtype = data.dtype if data.dtype.kind == "f" else "float64" |
1433 | 1459 |
|
1434 |
| - data, units = encode_cf_timedelta(data, encoding.pop("units", None), dtype) |
| 1460 | + else: |
| 1461 | + resolution, _ = np.datetime_data(variable.dtype) |
| 1462 | + dtype = np.int64 |
| 1463 | + attrs_dtype = f"timedelta64[{resolution}]" |
| 1464 | + units = _numpy_dtype_to_netcdf_timeunit(variable.dtype) |
| 1465 | + safe_setitem(attrs, "dtype", attrs_dtype, name=name) |
| 1466 | + # Remove dtype encoding if it exists to prevent it from |
| 1467 | + # interfering downstream in NonStringCoder. |
| 1468 | + encoding.pop("dtype", None) |
| 1469 | + |
| 1470 | + if any( |
| 1471 | + k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS |
| 1472 | + ): |
| 1473 | + raise ValueError( |
| 1474 | + f"Specifying 'add_offset' or 'scale_factor' is not " |
| 1475 | + f"supported when encoding the timedelta64 values of " |
| 1476 | + f"variable {name!r} with xarray's new default " |
| 1477 | + f"timedelta64 encoding approach. To encode {name!r} " |
| 1478 | + f"with xarray's previous timedelta64 encoding " |
| 1479 | + f"approach, which supports the 'add_offset' and " |
| 1480 | + f"'scale_factor' parameters, additionally set " |
| 1481 | + f"encoding['units'] to a unit of time, e.g. " |
| 1482 | + f"'seconds'. To proceed with encoding of {name!r} " |
| 1483 | + f"via xarray's new approach, remove any encoding " |
| 1484 | + f"entries for 'add_offset' or 'scale_factor'." |
| 1485 | + ) |
| 1486 | + if "_FillValue" not in encoding and "missing_value" not in encoding: |
| 1487 | + encoding["_FillValue"] = np.iinfo(np.int64).min |
1435 | 1488 |
|
| 1489 | + data, units = encode_cf_timedelta(data, units, dtype) |
1436 | 1490 | safe_setitem(attrs, "units", units, name=name)
|
1437 |
| - |
1438 | 1491 | return Variable(dims, data, attrs, encoding, fastpath=True)
|
1439 | 1492 | else:
|
1440 | 1493 | return variable
|
1441 | 1494 |
|
1442 | 1495 | def decode(self, variable: Variable, name: T_Name = None) -> Variable:
|
1443 | 1496 | units = variable.attrs.get("units", None)
|
1444 |
| - if isinstance(units, str) and units in TIME_UNITS: |
1445 |
| - if self._emit_decode_timedelta_future_warning: |
1446 |
| - emit_user_level_warning( |
1447 |
| - "In a future version of xarray decode_timedelta will " |
1448 |
| - "default to False rather than None. To silence this " |
1449 |
| - "warning, set decode_timedelta to True, False, or a " |
1450 |
| - "'CFTimedeltaCoder' instance.", |
1451 |
| - FutureWarning, |
1452 |
| - ) |
| 1497 | + has_timedelta_units = isinstance(units, str) and units in TIME_UNITS |
| 1498 | + has_timedelta_dtype = has_timedelta64_encoding_dtype(variable.attrs) |
| 1499 | + is_dtype_decodable = has_timedelta_units and has_timedelta_dtype |
| 1500 | + is_units_decodable = has_timedelta_units |
| 1501 | + if (is_dtype_decodable and self.decode_via_dtype) or ( |
| 1502 | + is_units_decodable and self.decode_via_units |
| 1503 | + ): |
1453 | 1504 | dims, data, attrs, encoding = unpack_for_decoding(variable)
|
1454 |
| - |
1455 | 1505 | units = pop_to(attrs, encoding, "units")
|
1456 |
| - dtype = np.dtype(f"timedelta64[{self.time_unit}]") |
1457 |
| - transform = partial( |
1458 |
| - decode_cf_timedelta, units=units, time_unit=self.time_unit |
1459 |
| - ) |
| 1506 | + if is_dtype_decodable and self.decode_via_dtype: |
| 1507 | + if any( |
| 1508 | + k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS |
| 1509 | + ): |
| 1510 | + raise ValueError( |
| 1511 | + f"Decoding timedelta64 values via dtype is not " |
| 1512 | + f"supported when 'add_offset', or 'scale_factor' are " |
| 1513 | + f"present in encoding. Check the encoding parameters " |
| 1514 | + f"of variable {name!r}." |
| 1515 | + ) |
| 1516 | + dtype = pop_to(attrs, encoding, "dtype", name=name) |
| 1517 | + dtype = np.dtype(dtype) |
| 1518 | + resolution, _ = np.datetime_data(dtype) |
| 1519 | + resolution = cast(NPDatetimeUnitOptions, resolution) |
| 1520 | + if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): |
| 1521 | + time_unit = cast(PDDatetimeUnitOptions, "s") |
| 1522 | + dtype = np.dtype("timedelta64[s]") |
| 1523 | + message = ( |
| 1524 | + f"Following pandas, xarray only supports decoding to " |
| 1525 | + f"timedelta64 values with a resolution of 's', 'ms', " |
| 1526 | + f"'us', or 'ns'. Encoded values for variable {name!r} " |
| 1527 | + f"have a resolution of {resolution!r}. Attempting to " |
| 1528 | + f"decode to a resolution of 's'. Note, depending on " |
| 1529 | + f"the encoded values, this may lead to an " |
| 1530 | + f"OverflowError. Additionally, data will not be " |
| 1531 | + f"identically round tripped; xarray will choose an " |
| 1532 | + f"encoding dtype of 'timedelta64[s]' when re-encoding." |
| 1533 | + ) |
| 1534 | + emit_user_level_warning(message) |
| 1535 | + elif np.timedelta64(1, resolution) < np.timedelta64(1, "ns"): |
| 1536 | + time_unit = cast(PDDatetimeUnitOptions, "ns") |
| 1537 | + dtype = np.dtype("timedelta64[ns]") |
| 1538 | + message = ( |
| 1539 | + f"Following pandas, xarray only supports decoding to " |
| 1540 | + f"timedelta64 values with a resolution of 's', 'ms', " |
| 1541 | + f"'us', or 'ns'. Encoded values for variable {name!r} " |
| 1542 | + f"have a resolution of {resolution!r}. Attempting to " |
| 1543 | + f"decode to a resolution of 'ns'. Note, depending on " |
| 1544 | + f"the encoded values, this may lead to loss of " |
| 1545 | + f"precision. Additionally, data will not be " |
| 1546 | + f"identically round tripped; xarray will choose an " |
| 1547 | + f"encoding dtype of 'timedelta64[ns]' " |
| 1548 | + f"when re-encoding." |
| 1549 | + ) |
| 1550 | + emit_user_level_warning(message) |
| 1551 | + else: |
| 1552 | + time_unit = cast(PDDatetimeUnitOptions, resolution) |
| 1553 | + elif self.decode_via_units: |
| 1554 | + if self._emit_decode_timedelta_future_warning: |
| 1555 | + emit_user_level_warning( |
| 1556 | + "In a future version, xarray will not decode " |
| 1557 | + "timedelta values based on the presence of a " |
| 1558 | + "timedelta-like units attribute by default. Instead " |
| 1559 | + "it will rely on the presence of a timedelta64 dtype " |
| 1560 | + "attribute, which is now xarray's default way of " |
| 1561 | + "encoding timedelta64 values. To continue decoding " |
| 1562 | + "timedeltas based on the presence of a timedelta-like " |
| 1563 | + "units attribute, users will need to explicitly " |
| 1564 | + "opt-in by passing True or " |
| 1565 | + "CFTimedeltaCoder(decode_via_units=True) to " |
| 1566 | + "decode_timedelta. To silence this warning, set " |
| 1567 | + "decode_timedelta to True, False, or a " |
| 1568 | + "'CFTimedeltaCoder' instance.", |
| 1569 | + FutureWarning, |
| 1570 | + ) |
| 1571 | + dtype = np.dtype(f"timedelta64[{self.time_unit}]") |
| 1572 | + time_unit = self.time_unit |
| 1573 | + transform = partial(decode_cf_timedelta, units=units, time_unit=time_unit) |
1460 | 1574 | data = lazy_elemwise_func(data, transform, dtype=dtype)
|
1461 |
| - |
1462 | 1575 | return Variable(dims, data, attrs, encoding, fastpath=True)
|
1463 | 1576 | else:
|
1464 | 1577 | return variable
|
0 commit comments