Skip to content

Commit b253d9c

Browse files
Add unicode symbols parser (#213)
* Add symb parser to handle unicode symbols * Add documentation for symb * Add tests for symb * Fix typo in the documentation --------- Contributed by: Antoine Fontaine <antoinefontaine@posteo.net>
1 parent 0a34acc commit b253d9c

File tree

9 files changed

+1203
-1
lines changed

9 files changed

+1203
-1
lines changed

doc/parser.qbk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@
218218
[def _control_ [globalref boost::parser::control `control`]]
219219
[def _digit_ [globalref boost::parser::digit `digit`]]
220220
[def _punct_ [globalref boost::parser::punct `punct`]]
221+
[def _symb_ [globalref boost::parser::symb `symb`]]
221222
[def _hex_digit_ [globalref boost::parser::hex_digit `hex_digit`]]
222223
[def _lower_ [globalref boost::parser::lower `lower`]]
223224
[def _upper_ [globalref boost::parser::upper `upper`]]

doc/tables.qbk

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ the input they match unless otherwise stated in the table below.]
132132
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
133133
[]]
134134

135+
[[ `_symb_` ]
136+
[ Matches a single symbol code point. ]
137+
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]
138+
[]]
139+
135140
[[ `_hex_digit_` ]
136141
[ Matches a single hexidecimal digit code point. ]
137142
[ The code point type in Unicode parsing, or `char` in non-Unicode parsing. See the entry for _ch_. ]

include/boost/parser/detail/printing.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,13 @@ namespace boost { namespace parser { namespace detail {
245245
std::ostream & os,
246246
int components = 0);
247247

248+
template<typename Context>
249+
void print_parser(
250+
Context const & context,
251+
char_set_parser<symb_chars> const & parser,
252+
std::ostream & os,
253+
int components = 0);
254+
248255
template<typename Context>
249256
void print_parser(
250257
Context const & context,

include/boost/parser/detail/printing_impl.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,16 @@ namespace boost { namespace parser { namespace detail {
636636
os << "punct";
637637
}
638638

639+
template<typename Context>
640+
void print_parser(
641+
Context const & context,
642+
char_set_parser<symb_chars> const & parser,
643+
std::ostream & os,
644+
int components)
645+
{
646+
os << "symb";
647+
}
648+
639649
template<typename Context>
640650
void print_parser(
641651
Context const & context,

include/boost/parser/detail/unicode_char_sets.hpp

Lines changed: 1157 additions & 0 deletions
Large diffs are not rendered by default.

include/boost/parser/parser.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7811,12 +7811,18 @@ namespace boost { namespace parser {
78117811
control;
78127812

78137813
/** The punctuation character parser. Matches the full set of Unicode
7814-
punctuation clases (specifically, "Pc", "Pd", "Pe", "Pf", "Pi", "Ps",
7814+
punctuation classes (specifically, "Pc", "Pd", "Pe", "Pf", "Pi", "Ps",
78157815
and "Po"). */
78167816
inline BOOST_PARSER_ALGO_CONSTEXPR
78177817
parser_interface<char_set_parser<detail::punct_chars>>
78187818
punct;
78197819

7820+
/** The symbol character parser. Matches the full set of Unicode
7821+
symbol classes (specifically, "Sc", "Sk", "Sm", and "So"). */
7822+
inline BOOST_PARSER_ALGO_CONSTEXPR
7823+
parser_interface<char_set_parser<detail::symb_chars>>
7824+
symb;
7825+
78207826
/** The lower case character parser. Matches the full set of Unicode
78217827
lower case code points (class "Ll"). */
78227828
inline BOOST_PARSER_ALGO_CONSTEXPR

include/boost/parser/parser_fwd.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ namespace boost { namespace parser {
143143

144144
struct punct_chars
145145
{};
146+
struct symb_chars
147+
{};
146148
struct lower_case_chars
147149
{};
148150
struct upper_case_chars

test/github_issues.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,10 @@ void github_issue_209()
245245
std::begin(bp::detail::char_set<detail::punct_chars>::chars),
246246
std::end(bp::detail::char_set<detail::punct_chars>::chars)));
247247

248+
BOOST_TEST(std::is_sorted(
249+
std::begin(bp::detail::char_set<detail::symb_chars>::chars),
250+
std::end(bp::detail::char_set<detail::symb_chars>::chars)));
251+
248252
BOOST_TEST(std::is_sorted(
249253
std::begin(bp::detail::char_set<detail::lower_case_chars>::chars),
250254
std::end(bp::detail::char_set<detail::lower_case_chars>::chars)));

test/parser.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2753,6 +2753,16 @@ int main()
27532753
BOOST_TEST(result == std::vector<uint32_t>({0x21, 0xfda}));
27542754
}
27552755

2756+
// symb_
2757+
{
2758+
auto parser = +symb;
2759+
2760+
std::u32string str = U"$^\u20AC!\u2194\u220F\U0001D7C6b\u2280\U0001FACE\U0001039F";
2761+
std::vector<uint32_t> result;
2762+
BOOST_TEST(parse(str, parser, char_ - symb, result));
2763+
BOOST_TEST(result == std::vector<uint32_t>({U'$', U'^', 0x20AC, 0x2194, 0x220F, 0x2280, 0x1FACE}));
2764+
}
2765+
27562766
// lower_
27572767
{
27582768
auto parser = +lower;

0 commit comments

Comments
 (0)