Skip to content

Commit ef56381

Browse files
committed
parser: Add better restrictions around semicolons in statements
When parsing macro invocations, rustc does not actually consume the statement's trailing semicolon. Let's take the following example: ```rust macro_rules! one_stmt { ($s:stmt) => {}; } macro_rules! one_or_more_stmt { ($($s:stmt)*) => {}; } one_stmt!(let a = 1); one_stmt!(let b = 2;); // error one_or_more_stmt!(;); // valid one_or_more_stmt!(let a = 15;); // valid, two statements! one_or_more_stmt!(let a = 15 let b = 13); // valid, two statements again ``` A semicolon can count as a valid empty statement, but cannot be part of a statement (in macro invocations). This commit adds more restrictions that allow the parser to not always expect a semicolon token after the statement. Furthermore, this fixes a test that was previously accepted by the compiler but not by rustc.
1 parent cc6e405 commit ef56381

File tree

5 files changed

+53
-34
lines changed

5 files changed

+53
-34
lines changed

gcc/rust/expand/rust-macro-expand.cc

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -477,9 +477,12 @@ MacroExpander::match_fragment (Parser<MacroInvocLexer> &parser,
477477
parser.parse_visibility ();
478478
break;
479479

480-
case AST::MacroFragSpec::STMT:
481-
parser.parse_stmt (/* allow_no_semi */ true);
482-
break;
480+
case AST::MacroFragSpec::STMT: {
481+
auto restrictions = ParseRestrictions ();
482+
restrictions.consume_semi = false;
483+
parser.parse_stmt (restrictions);
484+
break;
485+
}
483486

484487
case AST::MacroFragSpec::LIFETIME:
485488
parser.parse_lifetime_params ();
@@ -887,11 +890,14 @@ transcribe_many_trait_impl_items (Parser<MacroInvocLexer> &parser,
887890
static std::vector<AST::SingleASTNode>
888891
transcribe_many_stmts (Parser<MacroInvocLexer> &parser, TokenId &delimiter)
889892
{
893+
auto restrictions = ParseRestrictions ();
894+
restrictions.consume_semi = false;
895+
890896
// FIXME: This is invalid! It needs to also handle cases where the macro
891897
// transcriber is an expression, but since the macro call is followed by
892898
// a semicolon, it's a valid ExprStmt
893-
return parse_many (parser, delimiter, [&parser] () {
894-
auto stmt = parser.parse_stmt (/* allow_no_semi */ true);
899+
return parse_many (parser, delimiter, [&parser, restrictions] () {
900+
auto stmt = parser.parse_stmt (restrictions);
895901
return AST::SingleASTNode (std::move (stmt));
896902
});
897903
}

gcc/rust/parse/rust-parse-impl.h

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6091,7 +6091,7 @@ Parser<ManagedTokenSource>::parse_named_function_param (
60916091
// Parses a statement (will further disambiguate any statement).
60926092
template <typename ManagedTokenSource>
60936093
std::unique_ptr<AST::Stmt>
6094-
Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
6094+
Parser<ManagedTokenSource>::parse_stmt (ParseRestrictions restrictions)
60956095
{
60966096
// quick exit for empty statement
60976097
// FIXME: Can we have empty statements without semicolons? Just nothing?
@@ -6116,7 +6116,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
61166116
{
61176117
case LET:
61186118
// let statement
6119-
return parse_let_stmt (std::move (outer_attrs), allow_no_semi);
6119+
return parse_let_stmt (std::move (outer_attrs), restrictions);
61206120
case PUB:
61216121
case MOD:
61226122
case EXTERN_TOK:
@@ -6171,7 +6171,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
61716171
// TODO: find out how to disable gcc "implicit fallthrough" warning
61726172
default:
61736173
// fallback: expression statement
6174-
return parse_expr_stmt (std::move (outer_attrs), allow_no_semi);
6174+
return parse_expr_stmt (std::move (outer_attrs), restrictions);
61756175
break;
61766176
}
61776177
}
@@ -6180,7 +6180,7 @@ Parser<ManagedTokenSource>::parse_stmt (bool allow_no_semi)
61806180
template <typename ManagedTokenSource>
61816181
std::unique_ptr<AST::LetStmt>
61826182
Parser<ManagedTokenSource>::parse_let_stmt (AST::AttrVec outer_attrs,
6183-
bool allow_no_semi)
6183+
ParseRestrictions restrictions)
61846184
{
61856185
Location locus = lexer.peek_token ()->get_locus ();
61866186
skip_token (LET);
@@ -6235,13 +6235,9 @@ Parser<ManagedTokenSource>::parse_let_stmt (AST::AttrVec outer_attrs,
62356235
}
62366236
}
62376237

6238-
if (!maybe_skip_token (SEMICOLON) && !allow_no_semi)
6239-
{
6240-
// skip after somewhere
6238+
if (restrictions.consume_semi)
6239+
if (!skip_token (SEMICOLON))
62416240
return nullptr;
6242-
/* TODO: how wise is it to ditch a mostly-valid let statement just
6243-
* because a semicolon is missing? */
6244-
}
62456241

62466242
return std::unique_ptr<AST::LetStmt> (
62476243
new AST::LetStmt (std::move (pattern), std::move (expr), std::move (type),
@@ -7076,7 +7072,7 @@ Parser<ManagedTokenSource>::parse_method ()
70767072
template <typename ManagedTokenSource>
70777073
std::unique_ptr<AST::ExprStmt>
70787074
Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
7079-
bool allow_no_semi)
7075+
ParseRestrictions restrictions)
70807076
{
70817077
/* potential thoughts - define new virtual method "has_block()" on expr. parse
70827078
* expr and then determine whether semicolon is needed as a result of this
@@ -7116,7 +7112,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
71167112
else
71177113
{
71187114
return parse_expr_stmt_without_block (std::move (outer_attrs),
7119-
allow_no_semi);
7115+
restrictions);
71207116
}
71217117
}
71227118
case UNSAFE: {
@@ -7130,7 +7126,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
71307126
else
71317127
{
71327128
return parse_expr_stmt_without_block (std::move (outer_attrs),
7133-
allow_no_semi);
7129+
restrictions);
71347130
}
71357131
}
71367132
default:
@@ -7139,7 +7135,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt (AST::AttrVec outer_attrs,
71397135
* initial tokens in order to prevent more syntactical errors at parse
71407136
* time. */
71417137
return parse_expr_stmt_without_block (std::move (outer_attrs),
7142-
allow_no_semi);
7138+
restrictions);
71437139
}
71447140
}
71457141

@@ -7255,7 +7251,7 @@ Parser<ManagedTokenSource>::parse_expr_stmt_with_block (
72557251
template <typename ManagedTokenSource>
72567252
std::unique_ptr<AST::ExprStmtWithoutBlock>
72577253
Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
7258-
AST::AttrVec outer_attrs, bool allow_no_semi)
7254+
AST::AttrVec outer_attrs, ParseRestrictions restrictions)
72597255
{
72607256
/* TODO: maybe move more logic for expr without block in here for better error
72617257
* handling */
@@ -7264,7 +7260,6 @@ Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
72647260
std::unique_ptr<AST::ExprWithoutBlock> expr = nullptr;
72657261
Location locus = lexer.peek_token ()->get_locus ();
72667262

7267-
auto restrictions = ParseRestrictions ();
72687263
restrictions.expr_can_be_stmt = true;
72697264

72707265
expr = parse_expr_without_block (std::move (outer_attrs), restrictions);
@@ -7279,12 +7274,9 @@ Parser<ManagedTokenSource>::parse_expr_stmt_without_block (
72797274
return nullptr;
72807275
}
72817276

7282-
// skip semicolon at end that is required
7283-
if (!maybe_skip_token (SEMICOLON) && !allow_no_semi)
7284-
{
7285-
// skip somewhere?
7277+
if (restrictions.consume_semi)
7278+
if (!skip_token (SEMICOLON))
72867279
return nullptr;
7287-
}
72887280

72897281
return std::unique_ptr<AST::ExprStmtWithoutBlock> (
72907282
new AST::ExprStmtWithoutBlock (std::move (expr), locus));

gcc/rust/parse/rust-parse.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ struct ParseRestrictions
8181
bool entered_from_unary = false;
8282
bool expr_can_be_null = false;
8383
bool expr_can_be_stmt = false;
84+
bool consume_semi = true;
8485
};
8586

8687
// Parser implementation for gccrs.
@@ -129,11 +130,9 @@ template <typename ManagedTokenSource> class Parser
129130
* | LetStatement
130131
* | ExpressionStatement
131132
* | MacroInvocationSemi
132-
*
133-
* @param allow_no_semi Allow the parser to not parse a semicolon after
134-
* the statement without erroring out
135133
*/
136-
std::unique_ptr<AST::Stmt> parse_stmt (bool allow_no_semi = false);
134+
std::unique_ptr<AST::Stmt> parse_stmt (ParseRestrictions restrictions
135+
= ParseRestrictions ());
137136
std::unique_ptr<AST::Type> parse_type ();
138137
std::unique_ptr<AST::ExternalItem> parse_external_item ();
139138
std::unique_ptr<AST::TraitItem> parse_trait_item ();
@@ -616,14 +615,17 @@ template <typename ManagedTokenSource> class Parser
616615
* semicolon to follow it
617616
*/
618617
std::unique_ptr<AST::LetStmt> parse_let_stmt (AST::AttrVec outer_attrs,
619-
bool allow_no_semi = false);
618+
ParseRestrictions restrictions
619+
= ParseRestrictions ());
620620
std::unique_ptr<AST::ExprStmt> parse_expr_stmt (AST::AttrVec outer_attrs,
621-
bool allow_no_semi = false);
621+
ParseRestrictions restrictions
622+
= ParseRestrictions ());
622623
std::unique_ptr<AST::ExprStmtWithBlock>
623624
parse_expr_stmt_with_block (AST::AttrVec outer_attrs);
624625
std::unique_ptr<AST::ExprStmtWithoutBlock>
625626
parse_expr_stmt_without_block (AST::AttrVec outer_attrs,
626-
bool allow_no_semi = false);
627+
ParseRestrictions restrictions
628+
= ParseRestrictions ());
627629
ExprOrStmt parse_stmt_or_expr_without_block ();
628630
ExprOrStmt parse_stmt_or_expr_with_block (AST::AttrVec outer_attrs);
629631
ExprOrStmt parse_macro_invocation_maybe_semi (AST::AttrVec outer_attrs);

gcc/testsuite/rust/compile/macro18.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ macro_rules! take_stmt {
77
}
88

99
fn main() -> i32 {
10-
take_stmt!(let complete = 15;);
10+
take_stmt!(let complete = 15;); // { dg-error "Failed to match any rule within macro" }
1111
take_stmt!(let lacking = 14);
1212

1313
0

gcc/testsuite/rust/compile/macro32.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
macro_rules! s {
2+
($s:stmt) => {{}};
3+
}
4+
5+
macro_rules! multi_s {
6+
($($s:stmt)+) => {{}};
7+
}
8+
9+
fn main() -> i32 {
10+
s!(let a = 15);
11+
s!(;); // Empty statement
12+
s!(let a = 15;); // { dg-error "Failed to match any rule within macro" }
13+
multi_s!(let a = 15;);
14+
// ^ this actually gets parsed as two statements - one LetStmt and one
15+
// empty statement. This is the same behavior as rustc, which you can
16+
// see using a count!() macro
17+
18+
32
19+
}

0 commit comments

Comments
 (0)