Skip to content

Commit c52a789

Browse files
authored
Always use Utf8 aware TextWalker for antlr4 (#9245)
1 parent adfd84c commit c52a789

File tree

13 files changed

+51
-24
lines changed

13 files changed

+51
-24
lines changed

ydb/library/yql/parser/lexer_common/hints.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ namespace {
3131

3232
class TTokenProcessor {
3333
public:
34-
TTokenProcessor(const TString& queryFile, TSQLHints& hints)
34+
TTokenProcessor(const TString& queryFile, TSQLHints& hints, bool utf8Aware)
3535
: QueryFile(queryFile)
3636
, Hints(hints)
37+
, Utf8Aware(utf8Aware)
3738
{}
3839

3940
TPosition ExtractPosition(const TParsedToken& token) const {
@@ -57,7 +58,7 @@ class TTokenProcessor {
5758
// skip leading comments
5859
return;
5960
}
60-
TVector<TSQLHint> currentHints = NDetail::ParseSqlHints(pos, token.Content);
61+
TVector<TSQLHint> currentHints = NDetail::ParseSqlHints(pos, token.Content, Utf8Aware);
6162
if (currentHints.empty()) {
6263
// no hints here
6364
return;
@@ -70,12 +71,14 @@ class TTokenProcessor {
7071
TMaybe<TPosition> PrevNonCommentPos;
7172
const TString QueryFile;
7273
TSQLHints& Hints;
74+
const bool Utf8Aware;
7375
};
7476

7577
}
7678

77-
bool CollectSqlHints(ILexer& lexer, const TString& query, const TString& queryName, const TString& queryFile, TSQLHints& hints, NYql::TIssues& issues, size_t maxErrors) {
78-
TTokenProcessor tp(queryFile, hints);
79+
bool CollectSqlHints(ILexer& lexer, const TString& query, const TString& queryName,
80+
const TString& queryFile, TSQLHints& hints, NYql::TIssues& issues, size_t maxErrors, bool utf8Aware) {
81+
TTokenProcessor tp(queryFile, hints, utf8Aware);
7982
return lexer.Tokenize(query, queryName, [&tp](TParsedToken&& token) { tp.ProcessToken(std::move(token)); }, issues, maxErrors);
8083
}
8184

ydb/library/yql/parser/lexer_common/hints.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ using TSQLHints = TMap<NYql::TPosition, TVector<TSQLHint>>;
2929
// For example: SELECT /*+ Name(Value) */ -- Name2(Value2)
3030
// in this case TSQLHints will consist of single entry with position of SELECT token
3131

32-
bool CollectSqlHints(ILexer& lexer, const TString& query, const TString& queryName, const TString& queryFile, TSQLHints& hints, NYql::TIssues& issues, size_t maxErrors);
32+
bool CollectSqlHints(ILexer& lexer, const TString& query, const TString& queryName,
33+
const TString& queryFile, TSQLHints& hints, NYql::TIssues& issues, size_t maxErrors, bool utf8Aware);
3334

3435
}
3536

ydb/library/yql/parser/lexer_common/parse_hints_impl.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ enum EParseState {
1515
WANT_WS,
1616
};
1717

18-
TVector<TSQLHint> ParseSqlHints(NYql::TPosition commentPos, const TStringBuf& comment) {
18+
TVector<TSQLHint> ParseSqlHints(NYql::TPosition commentPos, const TStringBuf& comment, bool utf8Aware) {
1919
TVector<TSQLHint> result;
2020
if (!comment.StartsWith("/*+") && !comment.StartsWith("--+")) {
2121
return result;
2222
}
2323
TSQLHint hint;
24-
NYql::TTextWalker commentWalker(commentPos);
24+
NYql::TTextWalker commentWalker(commentPos, utf8Aware);
2525
const size_t len = comment.size();
2626
EParseState state = EParseState::INITIAL;
2727
for (size_t i = 0; i < len; ++i) {

ydb/library/yql/parser/lexer_common/parse_hints_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ namespace NSQLTranslation {
66

77
namespace NDetail {
88

9-
TVector<TSQLHint> ParseSqlHints(NYql::TPosition commentPos, const TStringBuf& comment);
9+
TVector<TSQLHint> ParseSqlHints(NYql::TPosition commentPos, const TStringBuf& comment, bool utf8Aware);
1010

1111
}
1212

ydb/library/yql/parser/lexer_common/ut/hints_ut.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ TSQLHints CollectHints(const TString& query, bool antlr4Parser) {
1515
TSQLHints result;
1616
NYql::TIssues issues;
1717
size_t maxErrors = 100;
18-
UNIT_ASSERT(CollectSqlHints(*lexer, query, "", "", result, issues, maxErrors));
18+
UNIT_ASSERT(CollectSqlHints(*lexer, query, "", "", result, issues, maxErrors, false));
1919
UNIT_ASSERT(issues.Empty());
2020
return result;
2121
}

ydb/library/yql/parser/lexer_common/ut/parse_hints_ut.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ using namespace NSQLTranslation;
88
using namespace NSQLTranslation::NDetail;
99

1010
void CheckParse(TStringBuf comment, TStringBuf expected) {
11-
TString parsed = JoinSeq(",", ParseSqlHints({}, comment));
11+
TString parsed = JoinSeq(",", ParseSqlHints({}, comment, false));
1212
UNIT_ASSERT_NO_DIFF(parsed, expected);
1313
}
1414

ydb/library/yql/public/issue/yql_issue.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ struct TPosition {
5555

5656
class TTextWalker {
5757
public:
58-
TTextWalker(TPosition& position, bool utf8Aware = false)
58+
TTextWalker(TPosition& position, bool utf8Aware)
5959
: Position(position)
6060
, Utf8Aware(utf8Aware)
6161
, HaveCr(false)

ydb/library/yql/public/issue/yql_issue_ut.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ Y_UNIT_TEST_SUITE(TextWalkerTest) {
8989
TPosition pos;
9090
pos.Row = 1;
9191

92-
TTextWalker walker(pos);
92+
TTextWalker walker(pos, false);
9393
walker.Advance(TStringBuf("a\r\taa"));
9494

9595
UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(5, 1));
@@ -101,7 +101,7 @@ Y_UNIT_TEST_SUITE(TextWalkerTest) {
101101
TPosition pos;
102102
pos.Row = 1;
103103

104-
TTextWalker walker(pos);
104+
TTextWalker walker(pos, false);
105105
walker.Advance(TStringBuf("a\raa\r"));
106106
UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(4, 1));
107107
walker.Advance('\n');
@@ -115,6 +115,28 @@ Y_UNIT_TEST_SUITE(TextWalkerTest) {
115115
walker.Advance('a');
116116
UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(1, 3));
117117
}
118+
119+
Y_UNIT_TEST(UnicodeTest) {
120+
{
121+
TPosition pos;
122+
pos.Row = 1;
123+
124+
TTextWalker walker(pos, false);
125+
walker.Advance(TStringBuf("привет"));
126+
127+
UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(12, 1));
128+
}
129+
130+
{
131+
TPosition pos;
132+
pos.Row = 1;
133+
134+
TTextWalker walker(pos, true);
135+
walker.Advance(TStringBuf("привет"));
136+
137+
UNIT_ASSERT_VALUES_EQUAL(pos, TPosition(6, 1));
138+
}
139+
}
118140
}
119141

120142
Y_UNIT_TEST_SUITE(ToOneLineStringTest) {

ydb/library/yql/sql/pg/pg_sql.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5274,8 +5274,8 @@ class TConverter : public IPGParseEvents {
52745274
void ScanRows(const TString& query) {
52755275
QuerySize = query.Size();
52765276
RowStarts.push_back(0);
5277-
TPosition position(1, 1);
5278-
TTextWalker walker(position);
5277+
TPosition position(0, 1);
5278+
TTextWalker walker(position, true);
52795279
auto prevRow = position.Row;
52805280
for (ui32 i = 0; i < query.Size(); ++i) {
52815281
walker.Advance(query[i]);

ydb/library/yql/sql/v0/node.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,7 +1771,7 @@ bool TryStringContent(const TString& str, TString& result, ui32& flags, TString&
17711771

17721772
auto unescapeResult = UnescapeArbitraryAtom(atom, quoteChar, &sout, &readBytes);
17731773
if (unescapeResult != EUnescapeResult::OK) {
1774-
TTextWalker walker(pos);
1774+
TTextWalker walker(pos, false);
17751775
walker.Advance(atom.Trunc(readBytes));
17761776
error = UnescapeResultToString(unescapeResult);
17771777
return false;
@@ -1839,7 +1839,7 @@ TString IdContent(TContext& ctx, const TString& s) {
18391839

18401840
auto unescapeResult = UnescapeArbitraryAtom(atom, endSym, &sout, &readBytes);
18411841
if (unescapeResult != EUnescapeResult::OK) {
1842-
TTextWalker walker(pos);
1842+
TTextWalker walker(pos, false);
18431843
walker.Advance(atom.Trunc(readBytes));
18441844
ctx.Error(pos) << "Cannot parse broken identifier: " << UnescapeResultToString(unescapeResult);
18451845
return {};

0 commit comments

Comments
 (0)