@@ -15,9 +15,7 @@ class TCsvToYdbConverter {
15
15
public:
16
16
explicit TCsvToYdbConverter (TTypeParser& parser, const std::optional<TString>& nullValue)
17
17
: Parser(parser)
18
- , NullValue(nullValue)
19
- {
20
- }
18
+ , NullValue(nullValue) {}
21
19
22
20
template <class T , std::enable_if_t <std::is_integral_v<T> && std::is_signed_v<T>, std::nullptr_t > = nullptr >
23
21
static i64 StringToArithmetic (const TString& token, size_t & cnt) {
@@ -165,7 +163,7 @@ class TCsvToYdbConverter {
165
163
}
166
164
case EPrimitiveType::Interval64:
167
165
Builder.Interval64 (GetArithmetic<i64 >(token));
168
- break ;
166
+ break ;
169
167
case EPrimitiveType::TzDate:
170
168
Builder.TzDate (token);
171
169
break ;
@@ -441,7 +439,7 @@ TStringBuf Consume(NCsvFormat::CsvSplitter& splitter,
441
439
442
440
TCsvParser::TCsvParser (TString&& headerRow, const char delimeter, const std::optional<TString>& nullValue,
443
441
const std::map<std::string, TType>* destinationTypes,
444
- const std::map<TString, TString>* paramSources)
442
+ const std::map<TString, TString>* paramSources)
445
443
: HeaderRow(std::move(headerRow))
446
444
, Delimeter(delimeter)
447
445
, NullValue(nullValue)
@@ -454,7 +452,7 @@ TCsvParser::TCsvParser(TString&& headerRow, const char delimeter, const std::opt
454
452
455
453
TCsvParser::TCsvParser (TVector<TString>&& header, const char delimeter, const std::optional<TString>& nullValue,
456
454
const std::map<std::string, TType>* destinationTypes,
457
- const std::map<TString, TString>* paramSources)
455
+ const std::map<TString, TString>* paramSources)
458
456
: Header(std::move(header))
459
457
, Delimeter(delimeter)
460
458
, NullValue(nullValue)
@@ -529,41 +527,91 @@ void TCsvParser::BuildValue(TString& data, TValueBuilder& builder, const TType&
529
527
builder.EndStruct ();
530
528
}
531
529
532
- TValue TCsvParser::BuildList (std::vector<TString>& lines, const TString& filename, std::optional<ui64> row) const {
530
+ TValue TCsvParser::BuildList (const std::vector<TString>& lines, const TString& filename, std::optional<ui64> row) const {
533
531
std::vector<std::unique_ptr<TTypeParser>> columnTypeParsers;
534
532
columnTypeParsers.reserve (ResultColumnCount);
535
533
for (const TType* type : ResultLineTypesSorted) {
536
534
columnTypeParsers.push_back (std::make_unique<TTypeParser>(*type));
537
535
}
538
-
539
- Ydb::Value listValue;
540
- auto * listItems = listValue.mutable_items ();
536
+
537
+ // Original path with local value object
538
+ Ydb::Value listProtoValue;
539
+ auto * listItems = listProtoValue.mutable_items ();
541
540
listItems->Reserve (lines.size ());
542
- for (auto & line : lines) {
543
- NCsvFormat::CsvSplitter splitter (line, Delimeter);
544
- TParseMetadata meta {row, filename};
545
- auto * structItems = listItems->Add ()->mutable_items ();
546
- structItems->Reserve (ResultColumnCount);
547
- auto headerIt = Header.cbegin ();
548
- auto skipIt = SkipBitMap.begin ();
549
- auto typeParserIt = columnTypeParsers.begin ();
550
- do {
551
- if (headerIt == Header.cend ()) { // SkipBitMap has same size as Header
552
- throw FormatError (yexception () << " Header contains less fields than data. Header: \" " << HeaderRow << " \" , data: \" " << line << " \" " , meta);
553
- }
554
- TStringBuf nextField = Consume (splitter, meta, *headerIt);
555
- if (!*skipIt) {
556
- *structItems->Add () = FieldToValue (*typeParserIt->get (), nextField, NullValue, meta, *headerIt).GetProto ();
557
- ++typeParserIt;
558
- }
559
- ++headerIt;
560
- ++skipIt;
561
- } while (splitter.Step ());
541
+
542
+ for (const auto & line : lines) {
543
+ ProcessCsvLine (line, listItems, columnTypeParsers, row, filename);
562
544
if (row.has_value ()) {
563
545
++row.value ();
564
546
}
565
547
}
566
- return TValue (ResultListType.value (), std::move (listValue));
548
+
549
+ // Return a TValue that takes ownership via move
550
+ return TValue (ResultListType.value (), std::move (listProtoValue));
551
+ }
552
+
553
+ TValue TCsvParser::BuildListOnArena (
554
+ const std::vector<TString>& lines,
555
+ const TString& filename,
556
+ google::protobuf::Arena* arena,
557
+ std::optional<ui64> row
558
+ ) const {
559
+ Y_ASSERT (arena != nullptr );
560
+
561
+ std::vector<std::unique_ptr<TTypeParser>> columnTypeParsers;
562
+ columnTypeParsers.reserve (ResultColumnCount);
563
+ for (const TType* type : ResultLineTypesSorted) {
564
+ columnTypeParsers.push_back (std::make_unique<TTypeParser>(*type));
565
+ }
566
+
567
+ // allocate Ydb::Value on arena
568
+ Ydb::Value* listProtoValue = google::protobuf::Arena::CreateMessage<Ydb::Value>(arena);
569
+ auto * listItems = listProtoValue->mutable_items ();
570
+ listItems->Reserve (lines.size ());
571
+
572
+ for (const auto & line : lines) {
573
+ ProcessCsvLine (line, listItems, columnTypeParsers, row, filename);
574
+ if (row.has_value ()) {
575
+ ++row.value ();
576
+ }
577
+ }
578
+
579
+ // Return a TValue that references the arena-allocated message
580
+ return TValue (ResultListType.value (), listProtoValue);
581
+ }
582
+
583
+ // Helper method to process a single CSV line
584
+ void TCsvParser::ProcessCsvLine (
585
+ const TString& line,
586
+ google::protobuf::RepeatedPtrField<Ydb::Value>* listItems,
587
+ const std::vector<std::unique_ptr<TTypeParser>>& columnTypeParsers,
588
+ std::optional<ui64> currentRow,
589
+ const TString& filename
590
+ ) const {
591
+ NCsvFormat::CsvSplitter splitter (line, Delimeter);
592
+ auto * structItems = listItems->Add ()->mutable_items ();
593
+ structItems->Reserve (ResultColumnCount);
594
+
595
+ const TParseMetadata meta {currentRow, filename};
596
+
597
+ auto headerIt = Header.cbegin ();
598
+ auto skipIt = SkipBitMap.begin ();
599
+ auto typeParserIt = columnTypeParsers.begin ();
600
+
601
+ do {
602
+ if (headerIt == Header.cend ()) { // SkipBitMap has same size as Header
603
+ throw FormatError (yexception () << " Header contains less fields than data. Header: \" " << HeaderRow << " \" , data: \" " << line << " \" " , meta);
604
+ }
605
+ TStringBuf nextField = Consume (splitter, meta, *headerIt);
606
+ if (!*skipIt) {
607
+ TValue builtValue = FieldToValue (*typeParserIt->get (), nextField, NullValue, meta, *headerIt);
608
+ *structItems->Add () = std::move (builtValue.GetProto ());
609
+
610
+ ++typeParserIt;
611
+ }
612
+ ++headerIt;
613
+ ++skipIt;
614
+ } while (splitter.Step ());
567
615
}
568
616
569
617
void TCsvParser::BuildLineType () {
@@ -607,5 +655,10 @@ const TVector<TString>& TCsvParser::GetHeader() {
607
655
return Header;
608
656
}
609
657
658
+ const TString& TCsvParser::GetHeaderRow () const {
659
+ return HeaderRow;
660
+ }
661
+
610
662
}
611
663
}
664
+
0 commit comments