Skip to content

Commit a4dd750

Browse files
committed
Re-simplify by calling unescape_j::apply directly
1 parent a41cf2e commit a4dd750

File tree

3 files changed

+41
-130
lines changed

3 files changed

+41
-130
lines changed

include/graphqlservice/GraphQLGrammar.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,11 @@ struct backslash_token : one<'\\'>
101101
{
102102
};
103103

104-
struct escaped_unicode_content : rep<4, xdigit>
104+
struct escaped_unicode_codepoint : rep<4, xdigit>
105+
{
106+
};
107+
108+
struct escaped_unicode_content : list<escaped_unicode_codepoint, seq<backslash_token, one<'u'>>>
105109
{
106110
};
107111

include/graphqlservice/GraphQLTree.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include <tao/pegtl.hpp>
1414
#include <tao/pegtl/contrib/parse_tree.hpp>
1515

16-
#include <list>
1716
#include <string>
1817
#include <string_view>
1918
#include <variant>
@@ -27,10 +26,7 @@ struct ast_node : parse_tree::basic_node<ast_node>
2726
{
2827
GRAPHQLPEG_EXPORT std::string_view unescaped_view() const;
2928

30-
using string_or_utf16 = std::variant<std::string_view, std::uint16_t>;
31-
32-
std::variant<std::string_view, std::uint16_t, std::list<string_or_utf16>, std::string>
33-
unescaped;
29+
std::variant<std::string_view, std::string> unescaped;
3430
};
3531

3632
struct ast_input

src/GraphQLTree.cpp

Lines changed: 35 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -19,100 +19,53 @@ namespace peg {
1919

2020
std::string_view ast_node::unescaped_view() const
2121
{
22-
if (std::holds_alternative<std::uint16_t>(unescaped))
23-
{
24-
// The whole string_value was a single unicode character.
25-
std::string utf8;
22+
auto result = std::visit(
23+
[](const auto& value) noexcept {
24+
return std::string_view { value };
25+
},
26+
unescaped);
2627

27-
if (unescape::utf8_append_utf32(utf8, std::get<std::uint16_t>(unescaped)))
28-
{
29-
const_cast<ast_node*>(this)->unescaped = std::move(utf8);
30-
}
31-
else
32-
{
33-
throw parse_error("invalid escaped unicode code point", this->begin());
34-
}
35-
}
36-
else if (std::holds_alternative<std::list<string_or_utf16>>(unescaped))
28+
if (result.empty())
3729
{
38-
// First convert all of the consecutive unicode sequences to UTF-8 strings together.
39-
auto& values = std::get<std::list<string_or_utf16>>(const_cast<ast_node*>(this)->unescaped);
40-
const auto isUtf16 = [](const string_or_utf16& value) noexcept {
41-
return std::holds_alternative<std::uint16_t>(value);
42-
};
43-
auto itrStart = std::find_if(values.begin(), values.end(), isUtf16);
44-
auto itrEnd = std::find_if_not(itrStart, values.end(), isUtf16);
45-
std::list<std::string> utf8;
46-
47-
if (itrStart != itrEnd)
30+
if (children.size() > 1)
4831
{
49-
while (itrStart != itrEnd)
50-
{
51-
std::string unescaped;
52-
53-
// Translate surrogate pairs (based on unescape::unescape_j from PEGTL)
54-
for (auto itr = itrStart; itr != itrEnd; ++itr)
55-
{
56-
const auto c = std::get<std::uint16_t>(*itr);
57-
58-
if ((0xd800 <= c) && (c <= 0xdbff) && ++itr != itrEnd)
59-
{
60-
const auto d = std::get<std::uint16_t>(*itr);
61-
62-
if ((0xdc00 <= d) && (d <= 0xdfff))
63-
{
64-
(void)unescape::utf8_append_utf32(unescaped,
65-
(((c & 0x03ff) << 10) | (d & 0x03ff)) + 0x10000);
66-
continue;
67-
}
68-
}
69-
70-
if (!unescape::utf8_append_utf32(unescaped, c))
71-
{
72-
throw parse_error("invalid escaped unicode code point", this->begin());
73-
}
74-
}
75-
76-
utf8.push_back(std::move(unescaped));
77-
78-
values.erase(itrStart, itrEnd);
79-
values.insert(itrEnd, std::string_view { utf8.back() });
80-
81-
itrStart = std::find_if(itrEnd, values.end(), isUtf16);
82-
itrEnd = std::find_if_not(itrStart, values.end(), isUtf16);
83-
}
84-
}
32+
std::string joined;
8533

86-
// If the string_value had multiple unescaped sub-strings, concatenate them on
87-
// demand and store the result as a std::string.
88-
std::string joined;
34+
joined.reserve(std::accumulate(children.cbegin(),
35+
children.cend(),
36+
size_t(0),
37+
[](size_t total, const std::unique_ptr<ast_node>& child) {
38+
return total + child->unescaped_view().size();
39+
}));
8940

90-
joined.reserve(std::accumulate(values.cbegin(),
91-
values.cend(),
92-
size_t(0),
93-
[](size_t total, const auto& child) {
94-
return total + std::get<std::string_view>(child).size();
95-
}));
41+
for (const auto& child : children)
42+
{
43+
joined.append(child->unescaped_view());
44+
}
9645

97-
for (const auto& child : values)
46+
const_cast<ast_node*>(this)->unescaped = std::move(joined);
47+
result = std::get<std::string>(unescaped);
48+
}
49+
else if (!children.empty())
9850
{
99-
joined.append(std::get<std::string_view>(child));
51+
const_cast<ast_node*>(this)->unescaped = children.front()->unescaped_view();
52+
result = std::get<std::string_view>(unescaped);
10053
}
54+
else if (has_content() && is_type<escaped_unicode>())
55+
{
56+
const auto content = string_view();
57+
memory_input<> in(content.data(), content.size(), "escaped unicode");
58+
std::string utf8;
10159

102-
const_cast<ast_node*>(this)->unescaped = std::move(joined);
103-
}
60+
utf8.reserve((content.size() + 1) / 2);
61+
unescape::unescape_j::apply(in, utf8);
10462

105-
// By this point it should always be a std::string_view or a std::string.
106-
if (std::holds_alternative<std::string_view>(unescaped))
107-
{
108-
return std::get<std::string_view>(unescaped);
109-
}
110-
else if (std::holds_alternative<std::string>(unescaped))
111-
{
112-
return std::get<std::string>(unescaped);
63+
const_cast<ast_node*>(this)->unescaped = std::move(utf8);
64+
result = std::get<std::string>(unescaped);
65+
}
11366
}
11467

115-
throw parse_error("unexpected sub-string", this->begin());
68+
return result;
11669
}
11770

11871
using namespace tao::graphqlpeg;
@@ -165,20 +118,6 @@ struct ast_selector<float_value> : std::true_type
165118
template <>
166119
struct ast_selector<escaped_unicode> : std::true_type
167120
{
168-
static void transform(std::unique_ptr<ast_node>& n)
169-
{
170-
if (n->has_content())
171-
{
172-
auto content = n->string_view();
173-
174-
n->unescaped = unescape::unhex_string<uint16_t>(content.data() + 1,
175-
content.data() + content.size());
176-
177-
return;
178-
}
179-
180-
throw parse_error("invalid escaped unicode code point", n->begin());
181-
}
182121
};
183122

184123
template <>
@@ -263,34 +202,6 @@ struct ast_selector<block_quote_character> : std::true_type
263202
template <>
264203
struct ast_selector<string_value> : std::true_type
265204
{
266-
static void transform(std::unique_ptr<ast_node>& n)
267-
{
268-
if (!n->children.empty())
269-
{
270-
if (n->children.size() > 1)
271-
{
272-
std::list<ast_node::string_or_utf16> unescaped;
273-
274-
std::transform(n->children.cbegin(),
275-
n->children.cend(),
276-
std::back_inserter(unescaped),
277-
[](const auto& child) -> ast_node::string_or_utf16 {
278-
if (std::holds_alternative<std::uint16_t>(child->unescaped))
279-
{
280-
return { std::get<std::uint16_t>(child->unescaped) };
281-
}
282-
283-
return { child->unescaped_view() };
284-
});
285-
286-
n->unescaped = std::move(unescaped);
287-
}
288-
else
289-
{
290-
n->unescaped = std::move(n->children.front()->unescaped);
291-
}
292-
}
293-
}
294205
};
295206

296207
template <>

0 commit comments

Comments
 (0)