1
1
#include " csv_parser.h"
2
2
3
+ #include < ydb/public/api/protos/ydb_value.pb.h>
3
4
#include < ydb/public/lib/ydb_cli/common/common.h>
4
5
5
6
#include < library/cpp/string_utils/csv/csv.h>
@@ -177,7 +178,7 @@ class TCsvToYdbConverter {
177
178
}
178
179
}
179
180
180
- void BuildValue (TStringBuf token) {
181
+ void BuildValue (const TStringBuf& token) {
181
182
switch (Parser.GetKind ()) {
182
183
case TTypeParser::ETypeKind::Primitive: {
183
184
BuildPrimitive (TString (token));
@@ -279,7 +280,7 @@ class TCsvToYdbConverter {
279
280
throw TCsvParseException () << " Expected bool value: \" true\" or \" false\" , received: \" " << token << " \" ." ;
280
281
}
281
282
282
- void EnsureNull (TStringBuf token) const {
283
+ void EnsureNull (const TStringBuf& token) const {
283
284
if (!NullValue) {
284
285
throw TCsvParseException () << " Expected null value instead of \" " << token << " \" , but null value is not set." ;
285
286
}
@@ -288,7 +289,7 @@ class TCsvToYdbConverter {
288
289
}
289
290
}
290
291
291
- TValue Convert (TStringBuf token) {
292
+ TValue Convert (const TStringBuf& token) {
292
293
BuildValue (token);
293
294
return Builder.Build ();
294
295
}
@@ -317,10 +318,10 @@ TCsvParseException FormatError(const std::exception& inputError,
317
318
}
318
319
319
320
TValue FieldToValue (TTypeParser& parser,
320
- TStringBuf token,
321
+ const TStringBuf& token,
321
322
const std::optional<TString>& nullValue,
322
323
const TCsvParser::TParseMetadata& meta,
323
- TString columnName) {
324
+ const TString& columnName) {
324
325
try {
325
326
TCsvToYdbConverter converter (parser, nullValue);
326
327
return converter.Convert (token);
@@ -331,7 +332,7 @@ TValue FieldToValue(TTypeParser& parser,
331
332
332
333
TStringBuf Consume (NCsvFormat::CsvSplitter& splitter,
333
334
const TCsvParser::TParseMetadata& meta,
334
- TString columnName) {
335
+ const TString& columnName) {
335
336
try {
336
337
return splitter.Consume ();
337
338
} catch (std::exception& e) {
@@ -342,30 +343,30 @@ TStringBuf Consume(NCsvFormat::CsvSplitter& splitter,
342
343
}
343
344
344
345
TCsvParser::TCsvParser (TString&& headerRow, const char delimeter, const std::optional<TString>& nullValue,
345
- const std::map<TString, TType>* paramTypes ,
346
+ const std::map<TString, TType>* destinationTypes ,
346
347
const std::map<TString, TString>* paramSources)
347
348
: HeaderRow(std::move(headerRow))
348
349
, Delimeter(delimeter)
349
350
, NullValue(nullValue)
350
- , ParamTypes(paramTypes )
351
+ , DestinationTypes(destinationTypes )
351
352
, ParamSources(paramSources)
352
353
{
353
354
NCsvFormat::CsvSplitter splitter (HeaderRow, Delimeter);
354
355
Header = static_cast <TVector<TString>>(splitter);
355
356
}
356
357
357
358
TCsvParser::TCsvParser (TVector<TString>&& header, const char delimeter, const std::optional<TString>& nullValue,
358
- const std::map<TString, TType>* paramTypes ,
359
+ const std::map<TString, TType>* destinationTypes ,
359
360
const std::map<TString, TString>* paramSources)
360
361
: Header(std::move(header))
361
362
, Delimeter(delimeter)
362
363
, NullValue(nullValue)
363
- , ParamTypes(paramTypes )
364
+ , DestinationTypes(destinationTypes )
364
365
, ParamSources(paramSources)
365
366
{
366
367
}
367
368
368
- void TCsvParser::GetParams (TString& & data, TParamsBuilder& builder, const TParseMetadata& meta) const {
369
+ void TCsvParser::BuildParams (TString& data, TParamsBuilder& builder, const TParseMetadata& meta) const {
369
370
NCsvFormat::CsvSplitter splitter (data, Delimeter);
370
371
auto headerIt = Header.begin ();
371
372
do {
@@ -374,8 +375,8 @@ void TCsvParser::GetParams(TString&& data, TParamsBuilder& builder, const TParse
374
375
}
375
376
TStringBuf token = Consume (splitter, meta, *headerIt);
376
377
TString fullname = " $" + *headerIt;
377
- auto paramIt = ParamTypes ->find (fullname);
378
- if (paramIt == ParamTypes ->end ()) {
378
+ auto paramIt = DestinationTypes ->find (fullname);
379
+ if (paramIt == DestinationTypes ->end ()) {
379
380
++headerIt;
380
381
continue ;
381
382
}
@@ -395,7 +396,7 @@ void TCsvParser::GetParams(TString&& data, TParamsBuilder& builder, const TParse
395
396
}
396
397
}
397
398
398
- void TCsvParser::GetValue (TString& & data, TValueBuilder& builder, const TType& type, const TParseMetadata& meta) const {
399
+ void TCsvParser::BuildValue (TString& data, TValueBuilder& builder, const TType& type, const TParseMetadata& meta) const {
399
400
NCsvFormat::CsvSplitter splitter (data, Delimeter);
400
401
auto headerIt = Header.cbegin ();
401
402
std::map<TString, TStringBuf> fields;
@@ -431,18 +432,68 @@ void TCsvParser::GetValue(TString&& data, TValueBuilder& builder, const TType& t
431
432
builder.EndStruct ();
432
433
}
433
434
434
- TType TCsvParser::GetColumnsType () const {
435
+ TValue TCsvParser::BuildList (std::vector<TString>& lines, const TString& filename, std::optional<ui64> row) const {
436
+ std::vector<std::unique_ptr<TTypeParser>> columnTypeParsers;
437
+ columnTypeParsers.reserve (ResultColumnCount);
438
+ for (const TType* type : ResultLineTypesSorted) {
439
+ columnTypeParsers.push_back (std::make_unique<TTypeParser>(*type));
440
+ }
441
+ Ydb::Value listValue;
442
+ auto * listItems = listValue.mutable_items ();
443
+ listItems->Reserve (lines.size ());
444
+ for (auto & line : lines) {
445
+ std::vector<TStringBuf> fields;
446
+ NCsvFormat::CsvSplitter splitter (line, Delimeter);
447
+ TParseMetadata meta {row, filename};
448
+ auto headerIt = Header.cbegin ();
449
+ auto skipIt = SkipBitMap.begin ();
450
+ do {
451
+ if (headerIt == Header.cend ()) { // SkipBitMap has same size as Header
452
+ throw FormatError (yexception () << " Header contains less fields than data. Header: \" " << HeaderRow << " \" , data: \" " << line << " \" " , meta);
453
+ }
454
+ TStringBuf nextField = Consume (splitter, meta, *headerIt);
455
+ if (!*skipIt) {
456
+ fields.emplace_back (nextField);
457
+ }
458
+ ++headerIt;
459
+ ++skipIt;
460
+ } while (splitter.Step ());
461
+ auto * structItems = listItems->Add ()->mutable_items ();
462
+ structItems->Reserve (ResultColumnCount);
463
+ auto typeParserIt = columnTypeParsers.begin ();
464
+ auto fieldIt = fields.begin ();
465
+ auto nameIt = ResultLineNamesSorted.begin ();
466
+ // fields size equals columnTypeParsers size, no need for second end check
467
+ for (; typeParserIt != columnTypeParsers.end (); ++typeParserIt, ++fieldIt, ++nameIt) {
468
+ *structItems->Add () = FieldToValue (*typeParserIt->get (), *fieldIt, NullValue, meta, **nameIt).GetProto ();
469
+ }
470
+ if (row.has_value ()) {
471
+ ++row.value ();
472
+ }
473
+ }
474
+ return TValue (ResultListType.value (), std::move (listValue));
475
+ }
476
+
477
+ void TCsvParser::BuildLineType () {
478
+ SkipBitMap.reserve (Header.size ());
479
+ ResultColumnCount = 0 ;
435
480
TTypeBuilder builder;
436
481
builder.BeginStruct ();
437
482
for (const auto & colName : Header) {
438
- if (ParamTypes->find (colName) != ParamTypes->end ()) {
439
- builder.AddMember (colName, ParamTypes->at (colName));
483
+ auto findIt = DestinationTypes->find (colName);
484
+ if (findIt != DestinationTypes->end ()) {
485
+ builder.AddMember (colName, findIt->second );
486
+ ResultLineTypesSorted.emplace_back (&findIt->second );
487
+ ResultLineNamesSorted.emplace_back (&colName);
488
+ SkipBitMap.push_back (false );
489
+ ++ResultColumnCount;
440
490
} else {
441
- builder. AddMember ( " __ydb_skip_column_name " , TTypeBuilder (). Build () );
491
+ SkipBitMap. push_back ( true );
442
492
}
443
493
}
444
494
builder.EndStruct ();
445
- return builder.Build ();
495
+ ResultLineType = builder.Build ();
496
+ ResultListType = TTypeBuilder ().List (ResultLineType.value ()).Build ();
446
497
}
447
498
448
499
}
0 commit comments