Skip to content

Commit 5af5cb3

Browse files
vityamanrobot-piglet
authored andcommitted
YQL-19747 Improve yql_complete tool and add input validation
No description --- Pull Request resolved: ytsaurus/ytsaurus#1185 commit_hash:1def5874ff6a9a5b3dcdd0ad285d2e64b16c9306
1 parent 6a114f0 commit 5af5cb3

File tree

8 files changed

+62
-19
lines changed

8 files changed

+62
-19
lines changed

yql/essentials/parser/pg_wrapper/parser.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
#include "arena_ctx.h"
44

5+
#include <util/charset/utf8.h>
56
#include <util/generic/scope.h>
7+
68
#include <fcntl.h>
79
#include <stdint.h>
810

@@ -219,7 +221,7 @@ void PGParse(const TString& input, IPGParseEvents& events) {
219221
break;
220222
}
221223

222-
if (!TTextWalker::IsUtf8Intermediate(input[i])) {
224+
if (!IsUTF8ContinuationByte(input[i])) {
223225
++codepoints;
224226
}
225227
walker.Advance(input[i]);

yql/essentials/public/fastcheck/format.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h>
66
#include <yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h>
77
#include <yql/essentials/core/issue/yql_issue.h>
8+
#include <util/charset/utf8.h>
89
#include <util/string/builder.h>
910

1011
namespace NYql {
@@ -88,20 +89,20 @@ class TFormatRunner : public ICheckRunner {
8889
continue;
8990
}
9091

91-
while (i > 0 && TTextWalker::IsUtf8Intermediate(request.Program[i])) {
92+
while (i > 0 && IsUTF8ContinuationByte(request.Program[i])) {
9293
--i;
9394
}
9495

9596
break;
9697
}
9798

9899
TString formattedSample = formattedQuery.substr(i, FormatContextLimit);
99-
while (!formattedSample.empty() && TTextWalker::IsUtf8Intermediate(formattedQuery.back())) {
100+
while (!formattedSample.empty() && IsUTF8ContinuationByte(formattedQuery.back())) {
100101
formattedSample.erase(formattedSample.size() - 1);
101102
}
102103

103104
TString origSample = request.Program.substr(i, FormatContextLimit);
104-
while (!origSample.empty() && TTextWalker::IsUtf8Intermediate(origSample.back())) {
105+
while (!origSample.empty() && IsUTF8ContinuationByte(origSample.back())) {
105106
origSample.erase(origSample.size() - 1);
106107
}
107108

yql/essentials/public/issue/yql_issue.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ TTextWalker& TTextWalker::Advance(char c) {
5454
}
5555

5656
ui32 charDistance = 1;
57-
if (Utf8Aware && IsUtf8Intermediate(c)) {
57+
if (Utf8Aware && IsUTF8ContinuationByte(c)) {
5858
charDistance = 0;
5959
}
6060

yql/essentials/public/issue/yql_issue.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,6 @@ class TTextWalker {
6363
{
6464
}
6565

66-
static inline bool IsUtf8Intermediate(char c) {
67-
return (c & 0xC0) == 0x80;
68-
}
69-
7066
template<typename T>
7167
TTextWalker& Advance(const T& buf) {
7268
for (char c : buf) {

yql/essentials/sql/v1/complete/sql_complete.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,15 @@ namespace NSQLComplete {
2626
}
2727

2828
TCompletion Complete(TCompletionInput input) {
29+
if (
30+
input.CursorPosition < input.Text.length() &&
31+
IsUTF8ContinuationByte(input.Text.at(input.CursorPosition)) ||
32+
input.Text.length() < input.CursorPosition) {
33+
ythrow yexception()
34+
<< "invalid cursor position " << input.CursorPosition
35+
<< " for input size " << input.Text.size();
36+
}
37+
2938
auto prefix = input.Text.Head(input.CursorPosition);
3039
auto completedToken = GetCompletedToken(prefix);
3140

yql/essentials/sql/v1/complete/sql_complete_ut.cpp

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
6363
return MakeSqlCompletionEngine(std::move(lexer), std::move(service));
6464
}
6565

66-
TVector<TCandidate> Complete(ISqlCompletionEngine::TPtr& engine, TStringBuf prefix) {
67-
return engine->Complete({prefix}).Candidates;
66+
TVector<TCandidate> Complete(ISqlCompletionEngine::TPtr& engine, TCompletionInput input) {
67+
return engine->Complete(input).Candidates;
6868
}
6969

7070
Y_UNIT_TEST(Beginning) {
@@ -438,17 +438,31 @@ Y_UNIT_TEST_SUITE(SqlCompleteTests) {
438438
};
439439

440440
auto engine = MakeSqlCompletionEngineUT();
441-
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "se"), expected);
442-
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "sE"), expected);
443-
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "Se"), expected);
444-
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, "SE"), expected);
441+
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"se"}), expected);
442+
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"sE"}), expected);
443+
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"Se"}), expected);
444+
UNIT_ASSERT_VALUES_EQUAL(Complete(engine, {"SE"}), expected);
445445
}
446446

447447
Y_UNIT_TEST(InvalidStatementsRecovery) {
448448
auto engine = MakeSqlCompletionEngineUT();
449-
UNIT_ASSERT_GE(Complete(engine, "select select; ").size(), 35);
450-
UNIT_ASSERT_GE(Complete(engine, "select select;").size(), 35);
451-
UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, "!;").size(), 0, "Lexer failing");
449+
UNIT_ASSERT_GE(Complete(engine, {"select select; "}).size(), 35);
450+
UNIT_ASSERT_GE(Complete(engine, {"select select;"}).size(), 35);
451+
UNIT_ASSERT_VALUES_EQUAL_C(Complete(engine, {"!;"}).size(), 0, "Lexer failing");
452+
}
453+
454+
Y_UNIT_TEST(InvalidCursorPosition) {
455+
auto engine = MakeSqlCompletionEngineUT();
456+
457+
UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {"", 0}));
458+
UNIT_ASSERT_EXCEPTION(Complete(engine, {"", 1}), yexception);
459+
460+
UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {"s", 0}));
461+
UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {"s", 1}));
462+
463+
UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {"ы", 0}));
464+
UNIT_ASSERT_EXCEPTION(Complete(engine, {"ы", 1}), yexception);
465+
UNIT_ASSERT_NO_EXCEPTION(Complete(engine, {"ы", 2}));
452466
}
453467

454468
Y_UNIT_TEST(DefaultNameService) {

yql/essentials/tools/yql_complete/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ PEERDIR(
77
yql/essentials/sql/v1/complete
88
yql/essentials/sql/v1/lexer/antlr4_pure
99
yql/essentials/sql/v1/lexer/antlr4_pure_ansi
10+
yql/essentials/utils
1011
)
1112

1213
SRCS(

yql/essentials/tools/yql_complete/yql_complete.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
#include <yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
77
#include <yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h>
88

9+
#include <yql/essentials/utils/utf8.h>
10+
911
#include <library/cpp/getopt/last_getopt.h>
12+
13+
#include <util/charset/utf8.h>
1014
#include <util/stream/file.h>
1115

1216
NSQLComplete::TFrequencyData LoadFrequencyDataFromFile(TString filepath) {
@@ -25,6 +29,11 @@ NSQLComplete::TLexerSupplier MakePureLexerSupplier() {
2529
};
2630
}
2731

32+
size_t UTF8PositionToBytes(const TStringBuf text, size_t position) {
33+
const TStringBuf substr = SubstrUTF8(text, position, text.length());
34+
return substr.begin() - text.begin();
35+
}
36+
2837
int Run(int argc, char* argv[]) {
2938
NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();
3039

@@ -60,9 +69,20 @@ int Run(int argc, char* argv[]) {
6069
std::move(ranking)));
6170

6271
NSQLComplete::TCompletionInput input;
72+
6373
input.Text = queryString;
74+
if (!NYql::IsUtf8(input.Text)) {
75+
ythrow yexception() << "provided input is not UTF encoded";
76+
}
77+
6478
if (pos) {
65-
input.CursorPosition = *pos;
79+
input.CursorPosition = UTF8PositionToBytes(input.Text, *pos);
80+
} else if (Count(input.Text, '#') == 1) {
81+
Cerr << "Note: found an only '#', setting the cursor position\n";
82+
input.CursorPosition = input.Text.find('#');
83+
} else if (Count(input.Text, '#') >= 2) {
84+
Cerr << "Note: found multiple '#', defaulting the cursor position\n";
85+
input.CursorPosition = queryString.size();
6686
} else {
6787
input.CursorPosition = queryString.size();
6888
}

0 commit comments

Comments
 (0)