Skip to content

Commit e1a5fd3

Browse files
committed
Use must-rules as much as possible for better parse errors
1 parent f1c61cc commit e1a5fd3

File tree

6 files changed

+178
-189
lines changed

6 files changed

+178
-189
lines changed

GraphQLGrammar.cpp

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
#include "GraphQLGrammar.h"
22

3+
#include <tao/pegtl/contrib/unescape.hpp>
4+
35
#include <graphqlparser/Ast.h>
46

57
#include <memory>
6-
#include <iostream>
78
#include <stack>
89
#include <tuple>
9-
#include <codecvt>
10-
#include <locale>
10+
11+
#include <cstdio>
1112

1213
namespace facebook {
1314
namespace graphql {
@@ -26,7 +27,7 @@ struct parser_state
2627
std::unique_ptr<ast::SelectionSet> selectionSet;
2728

2829
std::string enumValue;
29-
std::ostringstream stringBuffer;
30+
std::string stringBuffer;
3031

3132
std::unique_ptr<ast::Name> name;
3233
std::unique_ptr<ast::Name> aliasName;
@@ -222,16 +223,10 @@ struct build_ast<grammar::escaped_unicode>
222223
template <typename _Input>
223224
static void apply(const _Input& in, parser_state& state)
224225
{
225-
std::wstring source;
226-
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> utf8conv;
227-
std::istringstream encoded(in.string());
228-
uint32_t wch;
229-
230-
// Skip past the first 'u' character
231-
encoded.seekg(1);
232-
encoded >> std::hex >> wch;
233-
source.push_back(static_cast<wchar_t>(wch));
234-
state.stringBuffer << utf8conv.to_bytes(source);
226+
if (!unescape::utf8_append_utf32(state.stringBuffer, unescape::unhex_string<uint32_t>(in.begin() + 1, in.end())))
227+
{
228+
throw parse_error("invalid escaped unicode code point", in);
229+
}
235230
}
236231
};
237232

@@ -246,40 +241,39 @@ struct build_ast<grammar::escaped_char>
246241
switch (ch)
247242
{
248243
case '"':
249-
state.stringBuffer << '"';
244+
state.stringBuffer += '"';
250245
break;
251246

252247
case '\\':
253-
state.stringBuffer << '\\';
248+
state.stringBuffer += '\\';
254249
break;
255250

256251
case '/':
257-
state.stringBuffer << '/';
252+
state.stringBuffer += '/';
258253
break;
259254

260255
case 'b':
261-
state.stringBuffer << '\b';
256+
state.stringBuffer += '\b';
262257
break;
263258

264259
case 'f':
265-
state.stringBuffer << '\f';
260+
state.stringBuffer += '\f';
266261
break;
267262

268263
case 'n':
269-
state.stringBuffer << '\n';
264+
state.stringBuffer += '\n';
270265
break;
271266

272267
case 'r':
273-
state.stringBuffer << '\r';
268+
state.stringBuffer += '\r';
274269
break;
275270

276271
case 't':
277-
state.stringBuffer << '\t';
272+
state.stringBuffer += '\t';
278273
break;
279274

280275
default:
281-
state.stringBuffer << '\\' << ch;
282-
break;
276+
throw parse_error("invalid escaped character sequence", in);
283277
}
284278
}
285279
};
@@ -290,7 +284,7 @@ struct build_ast<grammar::string_quote_character>
290284
template <typename _Input>
291285
static void apply(const _Input& in, parser_state& state)
292286
{
293-
state.stringBuffer << in.peek_char();
287+
state.stringBuffer += in.peek_char();
294288
}
295289
};
296290

@@ -301,7 +295,7 @@ struct build_ast<grammar::block_escape_sequence>
301295
template <typename _Input>
302296
static void apply(const _Input& in, parser_state& state)
303297
{
304-
state.stringBuffer << R"bq(""")bq";
298+
state.stringBuffer.append(R"bq(""")bq");
305299
}
306300
};
307301

@@ -311,7 +305,7 @@ struct build_ast<grammar::block_quote_character>
311305
template <typename _Input>
312306
static void apply(const _Input& in, parser_state& state)
313307
{
314-
state.stringBuffer << in.peek_char();
308+
state.stringBuffer += in.peek_char();
315309
}
316310
};
317311

@@ -321,13 +315,12 @@ struct build_ast<grammar::string_value>
321315
template <typename _Input>
322316
static void apply(const _Input& in, parser_state& state)
323317
{
324-
auto parsedValue = state.stringBuffer.str();
318+
auto parsedValue = std::move(state.stringBuffer);
325319
std::unique_ptr<char[], ast::CDeleter> value(reinterpret_cast<char*>(malloc(parsedValue.size() + 1)));
326320

327321
memmove(value.get(), parsedValue.data(), parsedValue.size());
328322
value[parsedValue.size()] = '\0';
329323
state.value.push(std::unique_ptr<ast::Value>(new ast::StringValue(get_location(in), value.release())));
330-
state.stringBuffer.str("");
331324
}
332325
};
333326

@@ -1096,11 +1089,23 @@ std::unique_ptr<ast::Node> parseString(const char* text)
10961089
return std::unique_ptr<ast::Node>(document.release());
10971090
}
10981091

1099-
std::unique_ptr<ast::Node> parseFile(FILE* file)
1092+
std::unique_ptr<ast::Node> parseFile(const char* fileName)
1093+
{
1094+
parser_state state;
1095+
std::unique_ptr<ast::Document> document;
1096+
file_input<> in(fileName);
1097+
1098+
state.document = &document;
1099+
parse<grammar::document, build_ast>(std::move(in), std::move(state));
1100+
1101+
return std::unique_ptr<ast::Node>(document.release());
1102+
}
1103+
1104+
std::unique_ptr<ast::Node> parseInput()
11001105
{
11011106
parser_state state;
11021107
std::unique_ptr<ast::Document> document;
1103-
file_input<> in(file, "GraphQL");
1108+
cstream_input<> in(stdin, 1024 * 1024, "GraphQL");
11041109

11051110
state.document = &document;
11061111
parse<grammar::document, build_ast>(std::move(in), std::move(state));

0 commit comments

Comments
 (0)