Skip to content

Commit f8f5387

Browse files
Gazizonokipnv1
andauthored
Fixed help for --null-value in import file command and refactored csv parser error handling (#3936)
Co-authored-by: Nikolay Perfilov <pnv1@yandex-team.ru>
1 parent e841ca6 commit f8f5387

File tree

8 files changed

+121
-60
lines changed

8 files changed

+121
-60
lines changed

ydb/public/lib/ydb_cli/commands/ydb_service_import.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,8 @@ void TCommandImportFromCsv::Config(TConfig& config) {
269269
config.Opts->AddLongOption("delimiter", "Field delimiter in rows")
270270
.RequiredArgument("STRING").StoreResult(&Delimiter).DefaultValue(Delimiter);
271271
}
272-
config.Opts->AddLongOption("null-value", "Value that would be interpreted as NULL")
273-
.RequiredArgument("STRING").StoreResult(&NullValue).DefaultValue(NullValue);
272+
config.Opts->AddLongOption("null-value", "Value that would be interpreted as NULL, no NULL value by default")
273+
.RequiredArgument("STRING").StoreResult(&NullValue);
274274
// TODO: quoting/quote_char
275275
}
276276

@@ -286,9 +286,7 @@ int TCommandImportFromCsv::Run(TConfig& config) {
286286
settings.Header(Header);
287287
settings.NewlineDelimited(NewlineDelimited);
288288
settings.HeaderRow(HeaderRow);
289-
if (config.ParseResult->Has("null-value")) {
290-
settings.NullValue(NullValue);
291-
}
289+
settings.NullValue(NullValue);
292290

293291
if (Delimiter.size() != 1) {
294292
throw TMisuseException()

ydb/public/lib/ydb_cli/commands/ydb_service_import.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class TCommandImportFromCsv : public TCommandImportFileBase {
7878
protected:
7979
TString HeaderRow;
8080
TString Delimiter;
81-
TString NullValue;
81+
std::optional<TString> NullValue;
8282
ui32 SkipRows = 0;
8383
bool Header = false;
8484
bool NewlineDelimited = true;

ydb/public/lib/ydb_cli/common/csv_parser.cpp

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class TCsvToYdbConverter {
4646
}
4747
return static_cast<T>(value);
4848
} catch (std::exception& e) {
49-
throw TMisuseException() << "Expected " << Parser.GetPrimitive() << " value, recieved: \"" << token << "\".";
49+
throw TCsvParseException() << "Expected " << Parser.GetPrimitive() << " value, recieved: \"" << token << "\".";
5050
}
5151
}
5252

@@ -173,7 +173,7 @@ class TCsvToYdbConverter {
173173
Builder.TzTimestamp(token);
174174
break;
175175
default:
176-
throw TMisuseException() << "Unsupported primitive type: " << Parser.GetPrimitive();
176+
throw TCsvParseException() << "Unsupported primitive type: " << Parser.GetPrimitive();
177177
}
178178
}
179179

@@ -224,7 +224,7 @@ class TCsvToYdbConverter {
224224
break;
225225
}
226226
default:
227-
throw TMisuseException() << "Unsupported type kind: " << Parser.GetKind();
227+
throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind();
228228
}
229229
}
230230

@@ -259,7 +259,7 @@ class TCsvToYdbConverter {
259259
break;
260260

261261
default:
262-
throw TMisuseException() << "Unsupported type kind: " << Parser.GetKind();
262+
throw TCsvParseException() << "Unsupported type kind: " << Parser.GetKind();
263263
}
264264
}
265265

@@ -276,15 +276,15 @@ class TCsvToYdbConverter {
276276
if (token == "false") {
277277
return false;
278278
}
279-
throw TMisuseException() << "Expected bool value: \"true\" or \"false\", recieved: \"" << token << "\".";
279+
throw TCsvParseException() << "Expected bool value: \"true\" or \"false\", recieved: \"" << token << "\".";
280280
}
281281

282282
void EnsureNull(TStringBuf token) const {
283283
if (!NullValue) {
284-
throw TMisuseException() << "Expected null value instead of \"" << token << "\", but null value is not set.";
284+
throw TCsvParseException() << "Expected null value instead of \"" << token << "\", but null value is not set.";
285285
}
286286
if (token != NullValue) {
287-
throw TMisuseException() << "Expected null value: \"" << NullValue << "\", recieved: \"" << token << "\".";
287+
throw TCsvParseException() << "Expected null value: \"" << NullValue << "\", recieved: \"" << token << "\".";
288288
}
289289
}
290290

@@ -299,6 +299,46 @@ class TCsvToYdbConverter {
299299
TValueBuilder Builder;
300300
};
301301

302+
TCsvParseException FormatError(const std::exception& inputError,
303+
const TCsvParser::TParseMetadata& meta,
304+
std::optional<TString> columnName = {}) {
305+
auto outputError = TCsvParseException() << "Error during CSV parsing";
306+
if (meta.Line.has_value()) {
307+
outputError << " in line " << meta.Line.value();
308+
}
309+
if (columnName.has_value()) {
310+
outputError << " in column `" << columnName.value() << "`";
311+
}
312+
if (meta.Filename.has_value()) {
313+
outputError << " in file `" << meta.Filename.value() << "`";
314+
}
315+
outputError << ":\n" << inputError.what();
316+
return outputError;
317+
}
318+
319+
TValue FieldToValue(TTypeParser& parser,
320+
TStringBuf token,
321+
const std::optional<TString>& nullValue,
322+
const TCsvParser::TParseMetadata& meta,
323+
TString columnName) {
324+
try {
325+
TCsvToYdbConverter converter(parser, nullValue);
326+
return converter.Convert(token);
327+
} catch (std::exception& e) {
328+
throw FormatError(e, meta, columnName);
329+
}
330+
}
331+
332+
TStringBuf Consume(NCsvFormat::CsvSplitter& splitter,
333+
const TCsvParser::TParseMetadata& meta,
334+
TString columnName) {
335+
try {
336+
return splitter.Consume();
337+
} catch (std::exception& e) {
338+
throw FormatError(e, meta, columnName);
339+
}
340+
}
341+
302342
}
303343

304344
TCsvParser::TCsvParser(TString&& headerRow, const char delimeter, const std::optional<TString>& nullValue,
@@ -325,19 +365,14 @@ TCsvParser::TCsvParser(TVector<TString>&& header, const char delimeter, const st
325365
{
326366
}
327367

328-
TValue TCsvParser::FieldToValue(TTypeParser& parser, TStringBuf token) const {
329-
TCsvToYdbConverter converter(parser, NullValue);
330-
return converter.Convert(token);
331-
}
332-
333-
void TCsvParser::GetParams(TString&& data, TParamsBuilder& builder) const {
368+
void TCsvParser::GetParams(TString&& data, TParamsBuilder& builder, const TParseMetadata& meta) const {
334369
NCsvFormat::CsvSplitter splitter(data, Delimeter);
335370
auto headerIt = Header.begin();
336371
do {
337-
TStringBuf token = splitter.Consume();
338372
if (headerIt == Header.end()) {
339-
throw TMisuseException() << "Header contains less fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"";
373+
throw FormatError(yexception() << "Header contains less fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"", meta);
340374
}
375+
TStringBuf token = Consume(splitter, meta, *headerIt);
341376
TString fullname = "$" + *headerIt;
342377
auto paramIt = ParamTypes->find(fullname);
343378
if (paramIt == ParamTypes->end()) {
@@ -347,35 +382,36 @@ void TCsvParser::GetParams(TString&& data, TParamsBuilder& builder) const {
347382
if (ParamSources) {
348383
auto paramSource = ParamSources->find(fullname);
349384
if (paramSource != ParamSources->end()) {
350-
throw TMisuseException() << "Parameter " << fullname << " value found in more than one source: stdin, " << paramSource->second << ".";
385+
throw FormatError(yexception() << "Parameter " << fullname << " value found in more than one source: stdin, " << paramSource->second << ".", meta);
351386
}
352387
}
353388
TTypeParser parser(paramIt->second);
354-
builder.AddParam(fullname, FieldToValue(parser, token));
389+
builder.AddParam(fullname, FieldToValue(parser, token, NullValue, meta, *headerIt));
355390
++headerIt;
356391
} while (splitter.Step());
357392

358393
if (headerIt != Header.end()) {
359-
throw TMisuseException() << "Header contains more fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"";
394+
throw FormatError(yexception() << "Header contains more fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"", meta);
360395
}
361396
}
362397

363-
void TCsvParser::GetValue(TString&& data, TValueBuilder& builder, const TType& type) const {
398+
void TCsvParser::GetValue(TString&& data, TValueBuilder& builder, const TType& type, const TParseMetadata& meta) const {
364399
NCsvFormat::CsvSplitter splitter(data, Delimeter);
365400
auto headerIt = Header.cbegin();
366401
std::map<TString, TStringBuf> fields;
367402
do {
368-
TStringBuf token = splitter.Consume();
369403
if (headerIt == Header.cend()) {
370-
throw TMisuseException() << "Header contains less fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"";
404+
throw FormatError(yexception() << "Header contains less fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"", meta);
371405
}
406+
TStringBuf token = Consume(splitter, meta, *headerIt);
372407
fields[*headerIt] = token;
373408
++headerIt;
374409
} while (splitter.Step());
375410

376411
if (headerIt != Header.cend()) {
377-
throw TMisuseException() << "Header contains more fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"";
412+
throw FormatError(yexception() << "Header contains more fields than data. Header: \"" << HeaderRow << "\", data: \"" << data << "\"", meta);
378413
}
414+
379415
builder.BeginStruct();
380416
TTypeParser parser(type);
381417
parser.OpenStruct();
@@ -386,10 +422,11 @@ void TCsvParser::GetValue(TString&& data, TValueBuilder& builder, const TType& t
386422
}
387423
auto fieldIt = fields.find(name);
388424
if (fieldIt == fields.end()) {
389-
throw TMisuseException() << "No member \"" << name << "\" in csv string for YDB struct type";
425+
throw FormatError(yexception() << "No member \"" << name << "\" in csv string for YDB struct type", meta);
390426
}
391-
builder.AddMember(name, FieldToValue(parser, fieldIt->second));
427+
builder.AddMember(name, FieldToValue(parser, fieldIt->second, NullValue, meta, name));
392428
}
429+
393430
parser.CloseStruct();
394431
builder.EndStruct();
395432
}

ydb/public/lib/ydb_cli/common/csv_parser.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,15 @@
77
namespace NYdb {
88
namespace NConsoleClient {
99

10+
class TCsvParseException : public yexception {};
11+
1012
class TCsvParser {
1113
public:
14+
struct TParseMetadata {
15+
std::optional<uint64_t> Line;
16+
std::optional<TString> Filename;
17+
};
18+
1219
TCsvParser() = default;
1320

1421
TCsvParser(const TCsvParser&) = delete;
@@ -24,13 +31,11 @@ class TCsvParser {
2431
const std::map<TString, TType>* paramTypes = nullptr,
2532
const std::map<TString, TString>* paramSources = nullptr);
2633

27-
void GetParams(TString&& data, TParamsBuilder& builder) const;
28-
void GetValue(TString&& data, TValueBuilder& builder, const TType& type) const;
34+
void GetParams(TString&& data, TParamsBuilder& builder, const TParseMetadata& meta) const;
35+
void GetValue(TString&& data, TValueBuilder& builder, const TType& type, const TParseMetadata& meta) const;
2936
TType GetColumnsType() const;
3037

3138
private:
32-
TValue FieldToValue(TTypeParser& parser, TStringBuf token) const;
33-
3439
TVector<TString> Header;
3540
TString HeaderRow;
3641
char Delimeter;

ydb/public/lib/ydb_cli/common/csv_parser_ut.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) {
2323
std::map<TString, TString> paramSources;
2424
TCsvParser parser(std::move(header), ',', "", &paramTypes, &paramSources);
2525
TParamsBuilder paramBuilder;
26-
parser.GetParams(std::move(data), paramBuilder);
26+
parser.GetParams(std::move(data), paramBuilder, TCsvParser::TParseMetadata{});
2727
auto values = paramBuilder.Build().GetValues();
2828
UNIT_ASSERT_EQUAL(values.size(), result.size());
2929
for (const auto& [name, value] : result) {
@@ -41,7 +41,7 @@ Y_UNIT_TEST_SUITE(YdbCliCsvParserTests) {
4141

4242
TCsvParser parser(std::move(header), ',', "", &paramTypes, nullptr);
4343
TValueBuilder valueBuilder;
44-
parser.GetValue(std::move(data), valueBuilder, result.GetType());
44+
parser.GetValue(std::move(data), valueBuilder, result.GetType(), TCsvParser::TParseMetadata{});
4545
UNIT_ASSERT(CompareValues(valueBuilder.Build(), result));
4646
}
4747

ydb/public/lib/ydb_cli/common/parameters.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ bool TCommandWithParameters::GetNextParams(THolder<TParamsBuilder>& paramBuilder
259259
}
260260
case EOutputFormat::Csv:
261261
case EOutputFormat::Tsv: {
262-
CsvParser.GetParams(std::move(*data), *paramBuilder);
262+
CsvParser.GetParams(std::move(*data), *paramBuilder, TCsvParser::TParseMetadata{});
263263
break;
264264
}
265265
default:
@@ -302,7 +302,7 @@ bool TCommandWithParameters::GetNextParams(THolder<TParamsBuilder>& paramBuilder
302302
case EOutputFormat::Csv:
303303
case EOutputFormat::Tsv: {
304304
TValueBuilder valueBuilder;
305-
CsvParser.GetValue(std::move(*data), valueBuilder, type);
305+
CsvParser.GetValue(std::move(*data), valueBuilder, type, TCsvParser::TParseMetadata{});
306306
paramBuilder->AddParam(fullname, valueBuilder.Build());
307307
break;
308308
}
@@ -381,7 +381,7 @@ bool TCommandWithParameters::GetNextParams(THolder<TParamsBuilder>& paramBuilder
381381
case EOutputFormat::Csv:
382382
case EOutputFormat::Tsv: {
383383
valueBuilder.AddListItem();
384-
CsvParser.GetValue(std::move(*data), valueBuilder, type.GetProto().list_type().item());
384+
CsvParser.GetValue(std::move(*data), valueBuilder, type.GetProto().list_type().item(), TCsvParser::TParseMetadata{});
385385
break;
386386
}
387387
default:

0 commit comments

Comments
 (0)