@@ -19,100 +19,53 @@ namespace peg {
19
19
20
20
std::string_view ast_node::unescaped_view () const
21
21
{
22
- if (std::holds_alternative<std::uint16_t >(unescaped))
23
- {
24
- // The whole string_value was a single unicode character.
25
- std::string utf8;
22
+ auto result = std::visit (
23
+ [](const auto & value) noexcept {
24
+ return std::string_view { value };
25
+ },
26
+ unescaped);
26
27
27
- if (unescape::utf8_append_utf32 (utf8, std::get<std::uint16_t >(unescaped)))
28
- {
29
- const_cast <ast_node*>(this )->unescaped = std::move (utf8);
30
- }
31
- else
32
- {
33
- throw parse_error (" invalid escaped unicode code point" , this ->begin ());
34
- }
35
- }
36
- else if (std::holds_alternative<std::list<string_or_utf16>>(unescaped))
28
+ if (result.empty ())
37
29
{
38
- // First convert all of the consecutive unicode sequences to UTF-8 strings together.
39
- auto & values = std::get<std::list<string_or_utf16>>(const_cast <ast_node*>(this )->unescaped );
40
- const auto isUtf16 = [](const string_or_utf16& value) noexcept {
41
- return std::holds_alternative<std::uint16_t >(value);
42
- };
43
- auto itrStart = std::find_if (values.begin (), values.end (), isUtf16);
44
- auto itrEnd = std::find_if_not (itrStart, values.end (), isUtf16);
45
- std::list<std::string> utf8;
46
-
47
- if (itrStart != itrEnd)
30
+ if (children.size () > 1 )
48
31
{
49
- while (itrStart != itrEnd)
50
- {
51
- std::string unescaped;
52
-
53
- // Translate surrogate pairs (based on unescape::unescape_j from PEGTL)
54
- for (auto itr = itrStart; itr != itrEnd; ++itr)
55
- {
56
- const auto c = std::get<std::uint16_t >(*itr);
57
-
58
- if ((0xd800 <= c) && (c <= 0xdbff ) && ++itr != itrEnd)
59
- {
60
- const auto d = std::get<std::uint16_t >(*itr);
61
-
62
- if ((0xdc00 <= d) && (d <= 0xdfff ))
63
- {
64
- (void )unescape::utf8_append_utf32 (unescaped,
65
- (((c & 0x03ff ) << 10 ) | (d & 0x03ff )) + 0x10000 );
66
- continue ;
67
- }
68
- }
69
-
70
- if (!unescape::utf8_append_utf32 (unescaped, c))
71
- {
72
- throw parse_error (" invalid escaped unicode code point" , this ->begin ());
73
- }
74
- }
75
-
76
- utf8.push_back (std::move (unescaped));
77
-
78
- values.erase (itrStart, itrEnd);
79
- values.insert (itrEnd, std::string_view { utf8.back () });
80
-
81
- itrStart = std::find_if (itrEnd, values.end (), isUtf16);
82
- itrEnd = std::find_if_not (itrStart, values.end (), isUtf16);
83
- }
84
- }
32
+ std::string joined;
85
33
86
- // If the string_value had multiple unescaped sub-strings, concatenate them on
87
- // demand and store the result as a std::string.
88
- std::string joined;
34
+ joined.reserve (std::accumulate (children.cbegin (),
35
+ children.cend (),
36
+ size_t (0 ),
37
+ [](size_t total, const std::unique_ptr<ast_node>& child) {
38
+ return total + child->unescaped_view ().size ();
39
+ }));
89
40
90
- joined.reserve (std::accumulate (values.cbegin (),
91
- values.cend (),
92
- size_t (0 ),
93
- [](size_t total, const auto & child) {
94
- return total + std::get<std::string_view>(child).size ();
95
- }));
41
+ for (const auto & child : children)
42
+ {
43
+ joined.append (child->unescaped_view ());
44
+ }
96
45
97
- for (const auto & child : values)
46
+ const_cast <ast_node*>(this )->unescaped = std::move (joined);
47
+ result = std::get<std::string>(unescaped);
48
+ }
49
+ else if (!children.empty ())
98
50
{
99
- joined.append (std::get<std::string_view>(child));
51
+ const_cast <ast_node*>(this )->unescaped = children.front ()->unescaped_view ();
52
+ result = std::get<std::string_view>(unescaped);
100
53
}
54
+ else if (has_content () && is_type<escaped_unicode>())
55
+ {
56
+ const auto content = string_view ();
57
+ memory_input<> in (content.data (), content.size (), " escaped unicode" );
58
+ std::string utf8;
101
59
102
- const_cast <ast_node*>( this )-> unescaped = std::move (joined );
103
- }
60
+ utf8. reserve ((content. size () + 1 ) / 2 );
61
+ unescape::unescape_j::apply (in, utf8);
104
62
105
- // By this point it should always be a std::string_view or a std::string.
106
- if (std::holds_alternative<std::string_view>(unescaped))
107
- {
108
- return std::get<std::string_view>(unescaped);
109
- }
110
- else if (std::holds_alternative<std::string>(unescaped))
111
- {
112
- return std::get<std::string>(unescaped);
63
+ const_cast <ast_node*>(this )->unescaped = std::move (utf8);
64
+ result = std::get<std::string>(unescaped);
65
+ }
113
66
}
114
67
115
- throw parse_error ( " unexpected sub-string " , this -> begin ()) ;
68
+ return result ;
116
69
}
117
70
118
71
using namespace tao ::graphqlpeg;
@@ -165,20 +118,6 @@ struct ast_selector<float_value> : std::true_type
165
118
template <>
166
119
struct ast_selector <escaped_unicode> : std::true_type
167
120
{
168
- static void transform (std::unique_ptr<ast_node>& n)
169
- {
170
- if (n->has_content ())
171
- {
172
- auto content = n->string_view ();
173
-
174
- n->unescaped = unescape::unhex_string<uint16_t >(content.data () + 1 ,
175
- content.data () + content.size ());
176
-
177
- return ;
178
- }
179
-
180
- throw parse_error (" invalid escaped unicode code point" , n->begin ());
181
- }
182
121
};
183
122
184
123
template <>
@@ -263,34 +202,6 @@ struct ast_selector<block_quote_character> : std::true_type
263
202
template <>
264
203
struct ast_selector <string_value> : std::true_type
265
204
{
266
- static void transform (std::unique_ptr<ast_node>& n)
267
- {
268
- if (!n->children .empty ())
269
- {
270
- if (n->children .size () > 1 )
271
- {
272
- std::list<ast_node::string_or_utf16> unescaped;
273
-
274
- std::transform (n->children .cbegin (),
275
- n->children .cend (),
276
- std::back_inserter (unescaped),
277
- [](const auto & child) -> ast_node::string_or_utf16 {
278
- if (std::holds_alternative<std::uint16_t >(child->unescaped ))
279
- {
280
- return { std::get<std::uint16_t >(child->unescaped ) };
281
- }
282
-
283
- return { child->unescaped_view () };
284
- });
285
-
286
- n->unescaped = std::move (unescaped);
287
- }
288
- else
289
- {
290
- n->unescaped = std::move (n->children .front ()->unescaped );
291
- }
292
- }
293
- }
294
205
};
295
206
296
207
template <>
0 commit comments