Skip to content

Commit 2ea1950

Browse files
MaxSagebaumhsutter
andauthored
Fix for hexadecimal escapes of the from \x{62}. (#1153)
* Fix for hexadecimal escapes of the from `\x{62}`. * Also support `\u{...}` for universal character names Add an error message to the failure path Add `//G` grammar comments Run regression tests --------- Co-authored-by: Herb Sutter <herb.sutter@gmail.com>
1 parent 9b7306f commit 2ea1950

File tree

1 file changed

+64
-7
lines changed

1 file changed

+64
-7
lines changed

source/lex.h

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -930,26 +930,51 @@ auto lex_line(
930930
return 0;
931931
};
932932

933+
//G simple-hexadecimal-digit-sequence:
934+
//G hexadecimal-digit
935+
//G simple-hexadecimal-digit-sequence hexadecimal-digit
936+
//G
933937
//G hexadecimal-escape-sequence:
934938
//G '\x' hexadecimal-digit
935939
//G hexadecimal-escape-sequence hexadecimal-digit
940+
//G '\x{' simple-hexadecimal-digit-sequence '}'
936941
//G
937942
auto peek_is_hexadecimal_escape_sequence = [&](int offset)
938943
{
939944
if (
940-
peek( offset) == '\\'
945+
peek(offset) == '\\'
941946
&& peek(1+offset) == 'x'
942-
&& is_hexadecimal_digit(peek(2+offset))
947+
&& (
948+
is_hexadecimal_digit(peek(2+offset))
949+
|| (peek(2+offset) == '{' && is_hexadecimal_digit(peek(3+offset)))
950+
)
943951
)
944952
{
953+
auto has_bracket = peek(2+offset) == '{';
945954
auto j = 3;
955+
956+
if (has_bracket) { ++j; }
957+
946958
while (
947959
peek(j+offset)
948960
&& is_hexadecimal_digit(peek(j+offset))
949961
)
950962
{
951963
++j;
952964
}
965+
966+
if (has_bracket) {
967+
if (peek(j+offset) == '}') {
968+
++j;
969+
} else {
970+
errors.emplace_back(
971+
source_position(lineno, i + offset),
972+
"invalid hexadecimal escape sequence - \\x{ must"
973+
" be followed by hexadecimal digits and a closing }"
974+
);
975+
return 0;
976+
}
977+
}
953978
return j;
954979
}
955980
return 0;
@@ -958,6 +983,7 @@ auto lex_line(
958983
//G universal-character-name:
959984
//G '\u' hex-quad
960985
//G '\U' hex-quad hex-quad
986+
//G '\u{' simple-hexadecimal-digit-sequence '}'
961987
//G
962988
//G hex-quad:
963989
//G hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit
@@ -967,6 +993,7 @@ auto lex_line(
967993
if (
968994
peek(offset) == '\\'
969995
&& peek(1 + offset) == 'u'
996+
&& peek(2 + offset) != '{'
970997
)
971998
{
972999
auto j = 2;
@@ -980,11 +1007,41 @@ auto lex_line(
9801007
if (j == 6) { return j; }
9811008
errors.emplace_back(
9821009
source_position( lineno, i + offset ),
983-
"invalid universal character name (\\u must"
984-
" be followed by 4 hexadecimal digits)"
1010+
"invalid universal character name - \\u without { must"
1011+
" be followed by 4 hexadecimal digits"
9851012
);
9861013
}
987-
if (
1014+
1015+
else if (
1016+
peek(offset) == '\\'
1017+
&& peek(1 + offset) == 'u'
1018+
&& peek(2 + offset) == '{'
1019+
)
1020+
{
1021+
auto j = 4;
1022+
1023+
while (
1024+
peek(j + offset)
1025+
&& is_hexadecimal_digit(peek(j + offset))
1026+
)
1027+
{
1028+
++j;
1029+
}
1030+
1031+
if (peek(j + offset) == '}') {
1032+
++j;
1033+
}
1034+
else {
1035+
errors.emplace_back(
1036+
source_position(lineno, i + offset),
1037+
"invalid universal character name - \\u{ must"
1038+
" be followed by hexadecimal digits and a closing }"
1039+
);
1040+
}
1041+
return j;
1042+
}
1043+
1044+
else if (
9881045
peek(offset) == '\\'
9891046
&& peek(1+offset) == 'U'
9901047
)
@@ -1000,8 +1057,8 @@ auto lex_line(
10001057
if (j == 10) { return j; }
10011058
errors.emplace_back(
10021059
source_position(lineno, i+offset),
1003-
"invalid universal character name (\\U must"
1004-
" be followed by 8 hexadecimal digits)"
1060+
"invalid universal character name - \\U must"
1061+
" be followed by 8 hexadecimal digits"
10051062
);
10061063
}
10071064
return 0;

0 commit comments

Comments
 (0)