Skip to content

Commit 84d527f

Browse files
authored
split proto_ast by antlr version (#9219)
1 parent ba01fbb commit 84d527f

File tree

31 files changed

+372
-342
lines changed

31 files changed

+372
-342
lines changed

ydb/library/yql/minikql/jsonpath/jsonpath.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include <ydb/library/yql/parser/proto_ast/gen/jsonpath/JsonPathLexer.h>
1111
#include <ydb/library/yql/parser/proto_ast/gen/jsonpath/JsonPathParser.h>
1212
#include <ydb/library/yql/parser/proto_ast/gen/jsonpath/JsonPathParser.pb.h>
13-
#include <ydb/library/yql/parser/proto_ast/proto_ast.h>
13+
#include <ydb/library/yql/parser/proto_ast/antlr3/proto_ast_antlr3.h>
1414

1515
#include <google/protobuf/message.h>
1616

@@ -64,7 +64,7 @@ const TAstNodePtr ParseJsonPathAst(const TStringBuf path, TIssues& issues, size_
6464
#if defined(_tsan_enabled_)
6565
TGuard<TMutex> guard(SanitizerJsonPathTranslationMutex);
6666
#endif
67-
NProtoAST::TProtoASTBuilder<NALP::JsonPathParser, NALP::JsonPathLexer> builder(path, "JsonPath", &arena);
67+
NProtoAST::TProtoASTBuilder3<NALP::JsonPathParser, NALP::JsonPathLexer> builder(path, "JsonPath", &arena);
6868
TParseErrorsCollector collector(issues, maxParseErrors);
6969
rawAst = builder.BuildAST(collector);
7070
}

ydb/library/yql/minikql/jsonpath/ya.make

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ PEERDIR(
3434
ydb/library/yql/ast
3535
ydb/library/yql/utils
3636
ydb/library/yql/core/issue/protos
37-
ydb/library/yql/parser/proto_ast
37+
ydb/library/yql/parser/proto_ast/antlr3
3838
ydb/library/yql/parser/proto_ast/gen/jsonpath
3939
)
4040

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#include "proto_ast_antlr3.h"
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#pragma once
2+
3+
#include <ydb/library/yql/parser/proto_ast/common.h>
4+
5+
#include <contrib/libs/antlr3_cpp_runtime/include/antlr3.hpp>
6+
7+
namespace NProtoAST {
8+
9+
template <typename TParser, typename TLexer>
10+
class TProtoASTBuilder3 {
11+
typedef ANTLR_UINT8 TChar;
12+
13+
public:
14+
TProtoASTBuilder3(TStringBuf data, const TString& queryName = "query", google::protobuf::Arena* arena = nullptr)
15+
: QueryName(queryName)
16+
, InputStream((const TChar*)data.data(), antlr3::ENC_8BIT, data.length(), (TChar*)QueryName.begin()) // Why the hell antlr needs non-const ptr??
17+
, Lexer(&InputStream, static_cast<google::protobuf::Arena*>(nullptr))
18+
, TokenStream(ANTLR_SIZE_HINT, Lexer.get_tokSource())
19+
, Parser(&TokenStream, arena)
20+
{
21+
}
22+
23+
google::protobuf::Message* BuildAST(IErrorCollector& errors) {
24+
// TODO: find a better way to break on lexer errors
25+
try {
26+
Lexer.ReportErrors(&errors);
27+
return Parser.Parse(Lexer, &errors);
28+
} catch (const TTooManyErrors&) {
29+
return nullptr;
30+
} catch (...) {
31+
errors.Error(0, 0, CurrentExceptionMessage());
32+
return nullptr;
33+
}
34+
}
35+
36+
private:
37+
TString QueryName;
38+
39+
typename TLexer::InputStreamType InputStream;
40+
TLexer Lexer;
41+
42+
typename TParser::TokenStreamType TokenStream;
43+
TParser Parser;
44+
};
45+
46+
template <typename TLexer>
47+
class TLexerTokensCollector3 {
48+
typedef ANTLR_UINT8 TChar;
49+
50+
public:
51+
TLexerTokensCollector3(TStringBuf data, const char** tokenNames, const TString& queryName = "query")
52+
: TokenNames(tokenNames)
53+
, QueryName(queryName)
54+
, InputStream((const TChar*)data.data(), antlr3::ENC_8BIT, data.length(), (TChar*)QueryName.begin())
55+
, Lexer(&InputStream, static_cast<google::protobuf::Arena*>(nullptr))
56+
{
57+
}
58+
59+
void CollectTokens(IErrorCollector& errors, const NSQLTranslation::ILexer::TTokenCallback& onNextToken) {
60+
try {
61+
Lexer.ReportErrors(&errors);
62+
auto src = Lexer.get_tokSource();
63+
for (;;) {
64+
auto token = src->nextToken();
65+
auto type = token->getType();
66+
const bool isEOF = type == TLexer::CommonTokenType::TOKEN_EOF;
67+
NSQLTranslation::TParsedToken last;
68+
last.Name = isEOF ? "EOF" : TokenNames[type];
69+
last.Content = token->getText();
70+
last.Line = token->get_line();
71+
last.LinePos = token->get_charPositionInLine();
72+
onNextToken(std::move(last));
73+
if (isEOF) {
74+
break;
75+
}
76+
}
77+
} catch (const TTooManyErrors&) {
78+
} catch (...) {
79+
errors.Error(0, 0, CurrentExceptionMessage());
80+
}
81+
}
82+
83+
private:
84+
const char** TokenNames;
85+
TString QueryName;
86+
typename TLexer::InputStreamType InputStream;
87+
TLexer Lexer;
88+
};
89+
} // namespace NProtoAST
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
LIBRARY()
2+
3+
PEERDIR(
4+
ydb/library/yql/parser/proto_ast
5+
contrib/libs/antlr3_cpp_runtime
6+
)
7+
8+
SRCS(
9+
proto_ast_antlr3.cpp
10+
)
11+
12+
END()
13+
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#include "proto_ast_antlr4.h"
2+
3+
antlr4::YqlErrorListener::YqlErrorListener(NProtoAST::IErrorCollector* errors, bool* error)
4+
: errors(errors), error(error)
5+
{
6+
}
7+
8+
void antlr4::YqlErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/,
9+
size_t line, size_t charPositionInLine, const std::string &msg, std::exception_ptr /*e*/) {
10+
*error = true;
11+
errors->Error(line, charPositionInLine, msg.c_str());
12+
}
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#pragma once
2+
3+
#include <ydb/library/yql/parser/proto_ast/common.h>
4+
5+
#include <contrib/libs/antlr4_cpp_runtime/src/antlr4-runtime.h>
6+
7+
namespace antlr4 {
8+
class ANTLR4CPP_PUBLIC YqlErrorListener : public BaseErrorListener {
9+
NProtoAST::IErrorCollector* errors;
10+
bool* error;
11+
public:
12+
YqlErrorListener(NProtoAST::IErrorCollector* errors, bool* error);
13+
14+
virtual void syntaxError(Recognizer *recognizer, Token * offendingSymbol, size_t line, size_t charPositionInLine,
15+
const std::string &msg, std::exception_ptr e) override;
16+
};
17+
}
18+
19+
namespace NProtoAST {
20+
template <>
21+
inline void InvalidToken<antlr4::Token>(IOutputStream& err, const antlr4::Token* token) {
22+
if (token) {
23+
if (token->getInputStream()) {
24+
err << " '" << token->getText() << "'";
25+
} else {
26+
err << ABSENCE;
27+
}
28+
}
29+
}
30+
31+
template <typename TParser, typename TLexer>
32+
class TProtoASTBuilder4 {
33+
34+
public:
35+
TProtoASTBuilder4(TStringBuf data, const TString& queryName = "query", google::protobuf::Arena* arena = nullptr)
36+
: QueryName(queryName)
37+
, InputStream(data)
38+
, Lexer(&InputStream)
39+
, TokenStream(&Lexer)
40+
, Parser(&TokenStream, arena)
41+
{
42+
}
43+
44+
google::protobuf::Message* BuildAST(IErrorCollector& errors) {
45+
// TODO: find a better way to break on lexer errors
46+
typename antlr4::YqlErrorListener listener(&errors, &Parser.error);
47+
Parser.removeErrorListeners();
48+
Parser.addErrorListener(&listener);
49+
try {
50+
auto result = Parser.Parse(&errors);
51+
Parser.removeErrorListener(&listener);
52+
Parser.error = false;
53+
return result;
54+
} catch (const TTooManyErrors&) {
55+
Parser.removeErrorListener(&listener);
56+
Parser.error = false;
57+
return nullptr;
58+
} catch (...) {
59+
errors.Error(0, 0, CurrentExceptionMessage());
60+
Parser.removeErrorListener(&listener);
61+
Parser.error = false;
62+
return nullptr;
63+
}
64+
}
65+
66+
private:
67+
TString QueryName;
68+
69+
antlr4::ANTLRInputStream InputStream;
70+
TLexer Lexer;
71+
72+
antlr4::CommonTokenStream TokenStream;
73+
TParser Parser;
74+
};
75+
76+
template <typename TLexer>
77+
class TLexerTokensCollector4 {
78+
79+
public:
80+
TLexerTokensCollector4(TStringBuf data, const TString& queryName = "query")
81+
: QueryName(queryName)
82+
, InputStream(std::string(data))
83+
, Lexer(&InputStream)
84+
{
85+
}
86+
87+
void CollectTokens(IErrorCollector& errors, const NSQLTranslation::ILexer::TTokenCallback& onNextToken) {
88+
try {
89+
for (;;) {
90+
auto token = Lexer.nextToken();
91+
auto type = token->getType();
92+
const bool isEOF = type == TLexer::EOF;
93+
NSQLTranslation::TParsedToken last;
94+
last.Name = GetTokenName(type);
95+
last.Content = token->getText();
96+
last.Line = token->getLine();
97+
last.LinePos = token->getCharPositionInLine();
98+
onNextToken(std::move(last));
99+
if (isEOF) {
100+
break;
101+
}
102+
}
103+
} catch (const TTooManyErrors&) {
104+
} catch (...) {
105+
errors.Error(0, 0, CurrentExceptionMessage());
106+
}
107+
}
108+
109+
private:
110+
TString GetTokenName(size_t type) const {
111+
auto res = Lexer.getVocabulary().getSymbolicName(type);
112+
if (res != ""){
113+
return TString(res);
114+
}
115+
return TString(INVALID_TOKEN_NAME);
116+
}
117+
118+
TString QueryName;
119+
antlr4::ANTLRInputStream InputStream;
120+
TLexer Lexer;
121+
};
122+
} // namespace NProtoAST
123+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
LIBRARY()
2+
3+
PEERDIR(
4+
ydb/library/yql/parser/proto_ast
5+
contrib/libs/antlr4_cpp_runtime
6+
)
7+
8+
SRCS(
9+
proto_ast_antlr4.cpp
10+
)
11+
12+
END()
13+
14+

ydb/library/yql/parser/proto_ast/collect_issues/collect_issues.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
22

3-
#include <ydb/library/yql/parser/proto_ast/proto_ast.h>
3+
#include <ydb/library/yql/parser/proto_ast/common.h>
44
#include <ydb/library/yql/public/issue/yql_issue.h>
55

66
namespace NSQLTranslation {

ydb/library/yql/parser/proto_ast/proto_ast.cpp renamed to ydb/library/yql/parser/proto_ast/common.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
#include "proto_ast.h"
1+
#include "common.h"
22

3-
using namespace NProtoAST;
3+
namespace NProtoAST {
44

55
IErrorCollector::IErrorCollector(size_t maxErrors)
66
: MaxErrors(maxErrors)
@@ -44,13 +44,4 @@ void TErrorOutput::AddError(ui32 line, ui32 col, const TString& message) {
4444
Err << "Line " << line << " column " << col << " error: " << message;
4545
}
4646

47-
antlr4::YqlErrorListener::YqlErrorListener(NProtoAST::IErrorCollector* errors, bool* error)
48-
: errors(errors), error(error)
49-
{
5047
}
51-
52-
void antlr4::YqlErrorListener::syntaxError(Recognizer * /*recognizer*/, Token * /*offendingSymbol*/,
53-
size_t line, size_t charPositionInLine, const std::string &msg, std::exception_ptr /*e*/) {
54-
*error = true;
55-
errors->Error(line, charPositionInLine, msg.c_str());
56-
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#pragma once
2+
3+
#include <ydb/library/yql/parser/lexer_common/lexer.h>
4+
5+
#include <google/protobuf/message.h>
6+
#include <util/generic/ptr.h>
7+
#include <util/generic/vector.h>
8+
#include <util/charset/utf8.h>
9+
10+
namespace NProtoAST {
11+
static const char* INVALID_TOKEN_NAME = "nothing";
12+
static const char* ABSENCE = " absence";
13+
14+
template <typename InputType>
15+
void InvalidCharacter(IOutputStream& err, const InputType* input) {
16+
wchar32 rune = 0;
17+
size_t runeLen = 0;
18+
auto begin = input->get_nextChar();
19+
auto end = begin + input->get_sizeBuf();
20+
if (begin != end && SafeReadUTF8Char(rune, runeLen, begin, end) == RECODE_OK) {
21+
err << " '" << TStringBuf((const char*)begin, runeLen) << "' (Unicode character <" << ui32(rune) << ">)";
22+
}
23+
}
24+
25+
26+
template <typename TokenType>
27+
inline void InvalidToken(IOutputStream& err, const TokenType* token) {
28+
if (token) {
29+
if (token->get_input()) {
30+
err << " '" << token->getText() << "'";
31+
} else {
32+
err << ABSENCE;
33+
}
34+
}
35+
}
36+
37+
class TTooManyErrors : public yexception {
38+
};
39+
40+
class IErrorCollector {
41+
public:
42+
explicit IErrorCollector(size_t maxErrors);
43+
virtual ~IErrorCollector();
44+
45+
// throws TTooManyErrors
46+
void Error(ui32 line, ui32 col, const TString& message);
47+
48+
private:
49+
virtual void AddError(ui32 line, ui32 col, const TString& message) = 0;
50+
51+
protected:
52+
const size_t MaxErrors;
53+
size_t NumErrors;
54+
};
55+
56+
class TErrorOutput: public IErrorCollector {
57+
public:
58+
TErrorOutput(IOutputStream& err, const TString& name, size_t maxErrors);
59+
virtual ~TErrorOutput();
60+
61+
private:
62+
void AddError(ui32 line, ui32 col, const TString& message) override;
63+
64+
public:
65+
IOutputStream& Err;
66+
TString Name;
67+
};
68+
69+
} // namespace NProtoAST

ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/Cpp/Cpp.stg.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ headerFile( LEXER,
278278
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
279279
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
280280

281-
#include \<ydb/library/yql/parser/proto_ast/proto_ast.h>
281+
#include \<ydb/library/yql/parser/proto_ast/antlr3/proto_ast_antlr3.h>
282282

283283
/* =============================================================================
284284
* Standard antlr3 C++ runtime definitions

0 commit comments

Comments
 (0)