@@ -249,6 +249,38 @@ def __init__(
249
249
children = children ,
250
250
)
251
251
252
+ def _clear_cache (self ) -> None :
253
+ super ()._clear_cache ()
254
+ attrs = (
255
+ "days_in_month" ,
256
+ "is_year_start" ,
257
+ "is_leap_year" ,
258
+ "is_year_end" ,
259
+ "is_quarter_start" ,
260
+ "is_quarter_end" ,
261
+ "is_month_start" ,
262
+ "is_month_end" ,
263
+ "day_of_year" ,
264
+ "weekday" ,
265
+ "nanosecond" ,
266
+ "microsecond" ,
267
+ "millisecond" ,
268
+ "second" ,
269
+ "minute" ,
270
+ "hour" ,
271
+ "day" ,
272
+ "month" ,
273
+ "year" ,
274
+ "quarter" ,
275
+ "time_unit" ,
276
+ )
277
+ for attr in attrs :
278
+ try :
279
+ delattr (self , attr )
280
+ except AttributeError :
281
+ # attr was not called yet, so ignore.
282
+ pass
283
+
252
284
@staticmethod
253
285
def _validate_dtype_instance (dtype : np .dtype ) -> np .dtype :
254
286
if not (isinstance (dtype , np .dtype ) and dtype .kind == "M" ):
@@ -287,86 +319,86 @@ def _validate_fillna_value(
287
319
def time_unit (self ) -> str :
288
320
return np .datetime_data (self .dtype )[0 ]
289
321
290
- @property
322
+ @functools . cached_property
291
323
@acquire_spill_lock ()
292
324
def quarter (self ) -> ColumnBase :
293
325
return type (self ).from_pylibcudf (
294
326
plc .datetime .extract_quarter (self .to_pylibcudf (mode = "read" ))
295
327
)
296
328
297
- @property
329
+ @functools . cached_property
298
330
def year (self ) -> ColumnBase :
299
331
return self ._get_dt_field (plc .datetime .DatetimeComponent .YEAR )
300
332
301
- @property
333
+ @functools . cached_property
302
334
def month (self ) -> ColumnBase :
303
335
return self ._get_dt_field (plc .datetime .DatetimeComponent .MONTH )
304
336
305
- @property
337
+ @functools . cached_property
306
338
def day (self ) -> ColumnBase :
307
339
return self ._get_dt_field (plc .datetime .DatetimeComponent .DAY )
308
340
309
- @property
341
+ @functools . cached_property
310
342
def hour (self ) -> ColumnBase :
311
343
return self ._get_dt_field (plc .datetime .DatetimeComponent .HOUR )
312
344
313
- @property
345
+ @functools . cached_property
314
346
def minute (self ) -> ColumnBase :
315
347
return self ._get_dt_field (plc .datetime .DatetimeComponent .MINUTE )
316
348
317
- @property
349
+ @functools . cached_property
318
350
def second (self ) -> ColumnBase :
319
351
return self ._get_dt_field (plc .datetime .DatetimeComponent .SECOND )
320
352
321
- @property
353
+ @functools . cached_property
322
354
def millisecond (self ) -> ColumnBase :
323
355
return self ._get_dt_field (plc .datetime .DatetimeComponent .MILLISECOND )
324
356
325
- @property
357
+ @functools . cached_property
326
358
def microsecond (self ) -> ColumnBase :
327
359
return self ._get_dt_field (plc .datetime .DatetimeComponent .MICROSECOND )
328
360
329
- @property
361
+ @functools . cached_property
330
362
def nanosecond (self ) -> ColumnBase :
331
363
return self ._get_dt_field (plc .datetime .DatetimeComponent .NANOSECOND )
332
364
333
- @property
365
+ @functools . cached_property
334
366
def weekday (self ) -> ColumnBase :
335
367
# pandas counts Monday-Sunday as 0-6
336
368
# while libcudf counts Monday-Sunday as 1-7
337
369
result = self ._get_dt_field (plc .datetime .DatetimeComponent .WEEKDAY )
338
370
return result - result .dtype .type (1 )
339
371
340
- @property
372
+ @functools . cached_property
341
373
@acquire_spill_lock ()
342
374
def day_of_year (self ) -> ColumnBase :
343
375
return type (self ).from_pylibcudf (
344
376
plc .datetime .day_of_year (self .to_pylibcudf (mode = "read" ))
345
377
)
346
378
347
- @property
379
+ @functools . cached_property
348
380
def is_month_start (self ) -> ColumnBase :
349
381
return (self .day == 1 ).fillna (False )
350
382
351
- @property
383
+ @functools . cached_property
352
384
def is_month_end (self ) -> ColumnBase :
353
385
with acquire_spill_lock ():
354
386
last_day_col = type (self ).from_pylibcudf (
355
387
plc .datetime .last_day_of_month (self .to_pylibcudf (mode = "read" ))
356
388
)
357
389
return (self .day == last_day_col .day ).fillna (False ) # type: ignore[attr-defined]
358
390
359
- @property
391
+ @functools . cached_property
360
392
def is_quarter_end (self ) -> ColumnBase :
361
393
last_month = self .month .isin ([3 , 6 , 9 , 12 ])
362
394
return (self .is_month_end & last_month ).fillna (False )
363
395
364
- @property
396
+ @functools . cached_property
365
397
def is_quarter_start (self ) -> ColumnBase :
366
398
first_month = self .month .isin ([1 , 4 , 7 , 10 ])
367
399
return (self .is_month_start & first_month ).fillna (False )
368
400
369
- @property
401
+ @functools . cached_property
370
402
def is_year_end (self ) -> ColumnBase :
371
403
day_of_year = self .day_of_year
372
404
leap_dates = self .is_leap_year
@@ -375,18 +407,18 @@ def is_year_end(self) -> ColumnBase:
375
407
non_leap = day_of_year == 365
376
408
return leap .copy_if_else (non_leap , leap_dates ).fillna (False )
377
409
378
- @property
410
+ @functools . cached_property
379
411
@acquire_spill_lock ()
380
412
def is_leap_year (self ) -> ColumnBase :
381
413
return type (self ).from_pylibcudf (
382
414
plc .datetime .is_leap_year (self .to_pylibcudf (mode = "read" ))
383
415
)
384
416
385
- @property
417
+ @functools . cached_property
386
418
def is_year_start (self ) -> ColumnBase :
387
419
return (self .day_of_year == 1 ).fillna (False )
388
420
389
- @property
421
+ @functools . cached_property
390
422
@acquire_spill_lock ()
391
423
def days_in_month (self ) -> ColumnBase :
392
424
return type (self ).from_pylibcudf (
@@ -417,7 +449,7 @@ def values(self):
417
449
Return a CuPy representation of the DateTimeColumn.
418
450
"""
419
451
raise NotImplementedError (
420
- "DateTime Arrays is not yet implemented in cudf "
452
+ "DateTime Arrays is not yet implemented in cupy "
421
453
)
422
454
423
455
def element_indexing (self , index : int ):
@@ -922,12 +954,12 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
922
954
else :
923
955
return False
924
956
925
- def _with_type_metadata (self , dtype ):
957
+ def _with_type_metadata (self , dtype ) -> DatetimeColumn :
926
958
if isinstance (dtype , pd .DatetimeTZDtype ):
927
959
return DatetimeTZColumn (
928
- data = self .base_data ,
960
+ data = self .base_data , # type: ignore[arg-type]
929
961
dtype = dtype ,
930
- mask = self .base_mask ,
962
+ mask = self .base_mask , # type: ignore[arg-type]
931
963
size = self .size ,
932
964
offset = self .offset ,
933
965
null_count = self .null_count ,
@@ -1003,7 +1035,7 @@ def tz_localize(
1003
1035
tz : str | None ,
1004
1036
ambiguous : Literal ["NaT" ] = "NaT" ,
1005
1037
nonexistent : Literal ["NaT" ] = "NaT" ,
1006
- ):
1038
+ ) -> DatetimeColumn :
1007
1039
if tz is None :
1008
1040
return self .copy ()
1009
1041
ambiguous , nonexistent = check_ambiguous_and_nonexistent (
@@ -1087,6 +1119,13 @@ def __init__(
1087
1119
children = children ,
1088
1120
)
1089
1121
1122
+ def _clear_cache (self ) -> None :
1123
+ super ()._clear_cache ()
1124
+ try :
1125
+ del self ._local_time
1126
+ except AttributeError :
1127
+ pass
1128
+
1090
1129
@staticmethod
1091
1130
def _validate_dtype_instance (
1092
1131
dtype : pd .DatetimeTZDtype ,
@@ -1118,25 +1157,24 @@ def time_unit(self) -> str:
1118
1157
return self .dtype .unit
1119
1158
1120
1159
@property
1121
- def _utc_time (self ):
1160
+ def _utc_time (self ) -> DatetimeColumn :
1122
1161
"""Return UTC time as naive timestamps."""
1123
1162
return DatetimeColumn (
1124
- data = self .base_data ,
1163
+ data = self .base_data , # type: ignore[arg-type]
1125
1164
dtype = _get_base_dtype (self .dtype ),
1126
- mask = self .base_mask ,
1165
+ mask = self .base_mask , # type: ignore[arg-type]
1127
1166
size = self .size ,
1128
1167
offset = self .offset ,
1129
1168
null_count = self .null_count ,
1130
1169
)
1131
1170
1132
- @property
1133
- def _local_time (self ):
1171
+ @functools . cached_property
1172
+ def _local_time (self ) -> DatetimeColumn :
1134
1173
"""Return the local time as naive timestamps."""
1135
1174
transition_times , offsets = get_tz_data (str (self .dtype .tz ))
1136
1175
base_dtype = _get_base_dtype (self .dtype )
1137
- transition_times = transition_times .astype (base_dtype )
1138
1176
indices = (
1139
- transition_times .searchsorted (
1177
+ transition_times .astype ( base_dtype ). searchsorted (
1140
1178
self .astype (base_dtype ), side = "right"
1141
1179
)
1142
1180
- 1
@@ -1173,7 +1211,7 @@ def _get_dt_field(
1173
1211
)
1174
1212
)
1175
1213
1176
- def __repr__ (self ):
1214
+ def __repr__ (self ) -> str :
1177
1215
# Arrow prints the UTC timestamps, but we want to print the
1178
1216
# local timestamps:
1179
1217
arr = self ._local_time .to_arrow ().cast (
@@ -1183,7 +1221,9 @@ def __repr__(self):
1183
1221
f"{ object .__repr__ (self )} \n { arr .to_string ()} \n dtype: { self .dtype } "
1184
1222
)
1185
1223
1186
- def tz_localize (self , tz : str | None , ambiguous = "NaT" , nonexistent = "NaT" ):
1224
+ def tz_localize (
1225
+ self , tz : str | None , ambiguous = "NaT" , nonexistent = "NaT"
1226
+ ) -> DatetimeColumn :
1187
1227
if tz is None :
1188
1228
return self ._local_time
1189
1229
ambiguous , nonexistent = check_ambiguous_and_nonexistent (
@@ -1194,14 +1234,14 @@ def tz_localize(self, tz: str | None, ambiguous="NaT", nonexistent="NaT"):
1194
1234
"Use `tz_convert` to convert between time zones."
1195
1235
)
1196
1236
1197
- def tz_convert (self , tz : str | None ):
1237
+ def tz_convert (self , tz : str | None ) -> DatetimeColumn :
1198
1238
if tz is None :
1199
1239
return self ._utc_time
1200
1240
elif tz == str (self .dtype .tz ):
1201
1241
return self .copy ()
1202
1242
utc_time = self ._utc_time
1203
1243
return type (self )(
1204
- data = utc_time .base_data ,
1244
+ data = utc_time .base_data , # type: ignore[arg-type]
1205
1245
dtype = pd .DatetimeTZDtype (self .time_unit , tz ),
1206
1246
mask = utc_time .base_mask ,
1207
1247
size = utc_time .size ,
0 commit comments