@@ -238,10 +238,10 @@ std::shared_ptr<arrow::Array> ArrowStringAsYqlTimestamp(const std::shared_ptr<ar
238
238
return builder.Build (true ).make_array ();
239
239
}
240
240
241
- template <bool isOptional>
242
- std::shared_ptr<arrow::Array> ArrowDate32AsYqlDate (const std::shared_ptr<arrow::DataType>& targetType, const std::shared_ptr<arrow::Array>& value) {
241
+ template <bool isOptional, typename TArrowType >
242
+ std::shared_ptr<arrow::Array> ArrowTypeAsYqlDate (const std::shared_ptr<arrow::DataType>& targetType, const std::shared_ptr<arrow::Array>& value) {
243
243
::NYql::NUdf::TFixedSizeArrayBuilder<ui16, isOptional> builder (NKikimr::NMiniKQL::TTypeInfoHelper (), targetType, *arrow::system_memory_pool (), value->length ());
244
- ::NYql::NUdf::TFixedSizeBlockReader<i32 , isOptional> reader;
244
+ ::NYql::NUdf::TFixedSizeBlockReader<TArrowType , isOptional> reader;
245
245
for (i64 i = 0 ; i < value->length (); ++i) {
246
246
const NUdf::TBlockItem item = reader.GetItem (*value->data (), i);
247
247
if constexpr (isOptional) {
@@ -253,7 +253,7 @@ std::shared_ptr<arrow::Array> ArrowDate32AsYqlDate(const std::shared_ptr<arrow::
253
253
throw parquet::ParquetException (TStringBuilder () << " null value for date could not be represented in non-optional type" );
254
254
}
255
255
256
- const i32 v = item.As <i32 >();
256
+ const TArrowType v = item.As <TArrowType >();
257
257
if (v < 0 || v > ::NYql::NUdf::MAX_DATE) {
258
258
throw parquet::ParquetException (TStringBuilder () << " date in parquet is out of range [0, " << ::NYql::NUdf::MAX_DATE << " ]: " << v);
259
259
}
@@ -262,6 +262,31 @@ std::shared_ptr<arrow::Array> ArrowDate32AsYqlDate(const std::shared_ptr<arrow::
262
262
return builder.Build (true ).make_array ();
263
263
}
264
264
265
+ template <bool isOptional>
266
+ std::shared_ptr<arrow::Array> ArrowStringAsYqlDate (const std::shared_ptr<arrow::DataType>& targetType, const std::shared_ptr<arrow::Array>& value, const NDB::FormatSettings& formatSettings) {
267
+ ::NYql::NUdf::TFixedSizeArrayBuilder<ui32, isOptional> builder (NKikimr::NMiniKQL::TTypeInfoHelper (), targetType, *arrow::system_memory_pool (), value->length ());
268
+ ::NYql::NUdf::TStringBlockReader<arrow::BinaryType, isOptional, NKikimr::NUdf::EDataSlot::String> reader;
269
+ for (i64 i = 0 ; i < value->length (); ++i) {
270
+ NUdf::TBlockItem item = reader.GetItem (*value->data (), i);
271
+
272
+ if constexpr (isOptional) {
273
+ if (!item) {
274
+ builder.Add (item);
275
+ continue ;
276
+ }
277
+ } else if (!item) {
278
+ throw parquet::ParquetException (TStringBuilder () << " null value for date could not be represented in non-optional type" );
279
+ }
280
+
281
+ auto ref = item.AsStringRef ();
282
+ NDB::ReadBufferFromMemory rb{ref.Data (), ref.Size ()};
283
+ uint16_t result = 0 ;
284
+ parseImpl<NDB::DataTypeDate>(result, rb, nullptr , formatSettings);
285
+ builder.Add (NUdf::TBlockItem (static_cast <ui16>(result)));
286
+ }
287
+ return builder.Build (true ).make_array ();
288
+ }
289
+
265
290
TColumnConverter ArrowUInt32AsYqlDatetime (const std::shared_ptr<arrow::DataType>& targetType, bool isOptional) {
266
291
return [targetType, isOptional](const std::shared_ptr<arrow::Array>& value) {
267
292
return isOptional
@@ -430,14 +455,62 @@ TColumnConverter ArrowDate32AsYqlString(const std::shared_ptr<arrow::DataType>&
430
455
};
431
456
}
432
457
458
+ TColumnConverter ArrowUInt16AsYqlDate (const std::shared_ptr<arrow::DataType>& targetType, bool isOptional) {
459
+ return [targetType, isOptional](const std::shared_ptr<arrow::Array>& value) {
460
+ return isOptional
461
+ ? ArrowTypeAsYqlDate<true , ui16>(targetType, value)
462
+ : ArrowTypeAsYqlDate<false , ui16>(targetType, value);
463
+ };
464
+ }
465
+
466
+ TColumnConverter ArrowInt32AsYqlDate (const std::shared_ptr<arrow::DataType>& targetType, bool isOptional) {
467
+ return [targetType, isOptional](const std::shared_ptr<arrow::Array>& value) {
468
+ return isOptional
469
+ ? ArrowTypeAsYqlDate<true , i32 >(targetType, value)
470
+ : ArrowTypeAsYqlDate<false , i32 >(targetType, value);
471
+ };
472
+ }
473
+
474
+ TColumnConverter ArrowUInt32AsYqlDate (const std::shared_ptr<arrow::DataType>& targetType, bool isOptional) {
475
+ return [targetType, isOptional](const std::shared_ptr<arrow::Array>& value) {
476
+ return isOptional
477
+ ? ArrowTypeAsYqlDate<true , ui32>(targetType, value)
478
+ : ArrowTypeAsYqlDate<false , ui32>(targetType, value);
479
+ };
480
+ }
481
+
482
+ TColumnConverter ArrowInt64AsYqlDate (const std::shared_ptr<arrow::DataType>& targetType, bool isOptional) {
483
+ return [targetType, isOptional](const std::shared_ptr<arrow::Array>& value) {
484
+ return isOptional
485
+ ? ArrowTypeAsYqlDate<true , i64 >(targetType, value)
486
+ : ArrowTypeAsYqlDate<false , i64 >(targetType, value);
487
+ };
488
+ }
489
+
490
+ TColumnConverter ArrowUInt64AsYqlDate (const std::shared_ptr<arrow::DataType>& targetType, bool isOptional) {
491
+ return [targetType, isOptional](const std::shared_ptr<arrow::Array>& value) {
492
+ return isOptional
493
+ ? ArrowTypeAsYqlDate<true , ui64>(targetType, value)
494
+ : ArrowTypeAsYqlDate<false , ui64>(targetType, value);
495
+ };
496
+ }
497
+
433
498
TColumnConverter ArrowDate32AsYqlDate (const std::shared_ptr<arrow::DataType>& targetType, bool isOptional, arrow::DateUnit unit) {
434
499
if (unit == arrow::DateUnit::MILLI) {
435
500
throw parquet::ParquetException (TStringBuilder () << " millisecond accuracy does not fit into the date" );
436
501
}
437
502
return [targetType, isOptional](const std::shared_ptr<arrow::Array>& value) {
438
503
return isOptional
439
- ? ArrowDate32AsYqlDate<true >(targetType, value)
440
- : ArrowDate32AsYqlDate<false >(targetType, value);
504
+ ? ArrowTypeAsYqlDate<true , i32 >(targetType, value)
505
+ : ArrowTypeAsYqlDate<false , i32 >(targetType, value);
506
+ };
507
+ }
508
+
509
+ TColumnConverter ArrowStringAsYqlDate (const std::shared_ptr<arrow::DataType>& targetType, bool isOptional, const NDB::FormatSettings& formatSettings) {
510
+ return [targetType, isOptional, formatSettings](const std::shared_ptr<arrow::Array>& value) {
511
+ return isOptional
512
+ ? ArrowStringAsYqlDate<true >(targetType, value, formatSettings)
513
+ : ArrowStringAsYqlDate<false >(targetType, value, formatSettings);
441
514
};
442
515
}
443
516
@@ -457,6 +530,8 @@ TColumnConverter BuildCustomConverter(const std::shared_ptr<arrow::DataType>& or
457
530
switch (originalType->id ()) {
458
531
case arrow::Type::UINT16: {
459
532
switch (slotItem) {
533
+ case NUdf::EDataSlot::Date:
534
+ return ArrowUInt16AsYqlDate (targetType, isOptional);
460
535
case NUdf::EDataSlot::Datetime:
461
536
return ArrowUInt16AsYqlDatetime (targetType, isOptional);
462
537
case NUdf::EDataSlot::Timestamp:
@@ -467,6 +542,8 @@ TColumnConverter BuildCustomConverter(const std::shared_ptr<arrow::DataType>& or
467
542
}
468
543
case arrow::Type::INT32: {
469
544
switch (slotItem) {
545
+ case NUdf::EDataSlot::Date:
546
+ return ArrowInt32AsYqlDate (targetType, isOptional);
470
547
case NUdf::EDataSlot::Datetime:
471
548
return ArrowInt32AsYqlDatetime (targetType, isOptional);
472
549
case NUdf::EDataSlot::Timestamp:
@@ -477,6 +554,8 @@ TColumnConverter BuildCustomConverter(const std::shared_ptr<arrow::DataType>& or
477
554
}
478
555
case arrow::Type::UINT32: {
479
556
switch (slotItem) {
557
+ case NUdf::EDataSlot::Date:
558
+ return ArrowUInt32AsYqlDate (targetType, isOptional);
480
559
case NUdf::EDataSlot::Datetime:
481
560
return ArrowUInt32AsYqlDatetime (targetType, isOptional);
482
561
case NUdf::EDataSlot::Timestamp:
@@ -487,6 +566,8 @@ TColumnConverter BuildCustomConverter(const std::shared_ptr<arrow::DataType>& or
487
566
}
488
567
case arrow::Type::INT64: {
489
568
switch (slotItem) {
569
+ case NUdf::EDataSlot::Date:
570
+ return ArrowInt64AsYqlDate (targetType, isOptional);
490
571
case NUdf::EDataSlot::Datetime:
491
572
return ArrowInt64AsYqlDatetime (targetType, isOptional);
492
573
case NUdf::EDataSlot::Timestamp:
@@ -497,6 +578,8 @@ TColumnConverter BuildCustomConverter(const std::shared_ptr<arrow::DataType>& or
497
578
}
498
579
case arrow::Type::UINT64: {
499
580
switch (slotItem) {
581
+ case NUdf::EDataSlot::Date:
582
+ return ArrowUInt64AsYqlDate (targetType, isOptional);
500
583
case NUdf::EDataSlot::Datetime:
501
584
return ArrowUInt64AsYqlDatetime (targetType, isOptional);
502
585
case NUdf::EDataSlot::Timestamp:
@@ -553,6 +636,8 @@ TColumnConverter BuildCustomConverter(const std::shared_ptr<arrow::DataType>& or
553
636
case arrow::Type::STRING:
554
637
case arrow::Type::BINARY: {
555
638
switch (slotItem) {
639
+ case NUdf::EDataSlot::Date:
640
+ return ArrowStringAsYqlDate (targetType, isOptional, formatSettings);
556
641
case NUdf::EDataSlot::Datetime:
557
642
return ArrowStringAsYqlDateTime (targetType, isOptional, formatSettings);
558
643
case NUdf::EDataSlot::Timestamp:
0 commit comments