Skip to content

Commit f4ba64e

Browse files
bors[bot]matklad
andauthored
Merge #10623
10623: internal: replace L_DOLLAR/R_DOLLAR with parenthesis hack r=matklad a=matklad The general problem we are dealing with here is this: ``` macro_rules! thrice { ($e:expr) => { $e * 3} } fn main() { let x = thrice!(1 + 2); } ``` we really want this to print 9 rather than 7. The way rustc solves this is rather ad-hoc. In rustc, token trees are allowed to include whole AST fragments, so 1+2 is passed through macro expansion as a single unit. This is a significant violation of token tree model. In rust-analyzer, we intended to handle this in a more elegant way, using token trees with "invisible" delimiters. The idea was is that we introduce a new kind of parenthesis, "left $"/"right $", and let the parser intelligently handle this. The idea was inspired by the relevant comment in the proc_macro crate: https://doc.rust-lang.org/stable/proc_macro/enum.Delimiter.html#variant.None > An implicit delimiter, that may, for example, appear around tokens > coming from a “macro variable” $var. It is important to preserve > operator priorities in cases like $var * 3 where $var is 1 + 2. > Implicit delimiters might not survive roundtrip of a token stream > through a string. Now that we are older and wiser, we conclude that the idea doesn't work. _First_, the comment in the proc-macro crate is wishful thinking. Rustc currently completely ignores none delimiters. It solves the (1 + 2) * 3 problem by having magical token trees which can't be duplicated: * https://rust-lang.zulipchat.com/#narrow/stream/185405-t-compiler.2Frust-analyzer/topic/TIL.20that.20token.20streams.20are.20magic * https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/Handling.20of.20Delimiter.3A.3ANone.20by.20the.20parser _Second_, it's not like our implementation in rust-analyzer works. We special-case expressions (as opposed to treating all kinds of $var captures the same) and we don't know how parser error recovery should work with these dollar-parenthesis. So, in this PR we simplify the whole thing away by not pretending that we are doing something proper and instead just explicitly special-casing expressions by wrapping them into real `()`. In the future, to maintain bug-parity with `rustc` what we are going to do is probably adding an explicit `CAPTURED_EXPR` *token* which we can explicitly account for in the parser. If/when rustc starts handling delimiter=none properly, we'll port that logic as well, in addition to special handling. Co-authored-by: Aleksey Kladov <aleksey.kladov@gmail.com>
2 parents 210a1d5 + 485c5e6 commit f4ba64e

File tree

12 files changed

+170
-183
lines changed

12 files changed

+170
-183
lines changed

crates/hir_def/src/macro_expansion_tests/mbe.rs

Lines changed: 51 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -317,32 +317,35 @@ macro_rules! m {
317317
($ i:expr) => { fn bar() { $ i * 3; } }
318318
}
319319
fn bar() {
320-
1+2*3;
320+
(1+2)*3;
321321
}
322-
// MACRO_ITEMS@0..15
323-
// FN@0..15
322+
// MACRO_ITEMS@0..17
323+
// FN@0..17
324324
// FN_KW@0..2 "fn"
325325
// NAME@2..5
326326
// IDENT@2..5 "bar"
327327
// PARAM_LIST@5..7
328328
// L_PAREN@5..6 "("
329329
// R_PAREN@6..7 ")"
330-
// BLOCK_EXPR@7..15
331-
// STMT_LIST@7..15
330+
// BLOCK_EXPR@7..17
331+
// STMT_LIST@7..17
332332
// L_CURLY@7..8 "{"
333-
// EXPR_STMT@8..14
334-
// BIN_EXPR@8..13
335-
// BIN_EXPR@8..11
336-
// LITERAL@8..9
337-
// INT_NUMBER@8..9 "1"
338-
// PLUS@9..10 "+"
339-
// LITERAL@10..11
340-
// INT_NUMBER@10..11 "2"
341-
// STAR@11..12 "*"
342-
// LITERAL@12..13
343-
// INT_NUMBER@12..13 "3"
344-
// SEMICOLON@13..14 ";"
345-
// R_CURLY@14..15 "}"
333+
// EXPR_STMT@8..16
334+
// BIN_EXPR@8..15
335+
// PAREN_EXPR@8..13
336+
// L_PAREN@8..9 "("
337+
// BIN_EXPR@9..12
338+
// LITERAL@9..10
339+
// INT_NUMBER@9..10 "1"
340+
// PLUS@10..11 "+"
341+
// LITERAL@11..12
342+
// INT_NUMBER@11..12 "2"
343+
// R_PAREN@12..13 ")"
344+
// STAR@13..14 "*"
345+
// LITERAL@14..15
346+
// INT_NUMBER@14..15 "3"
347+
// SEMICOLON@15..16 ";"
348+
// R_CURLY@16..17 "}"
346349
347350
"#]],
348351
)
@@ -722,7 +725,7 @@ macro_rules! m {
722725
}
723726
724727
fn bar() {
725-
2+2*baz(3).quux();
728+
(2+2*baz(3).quux());
726729
}
727730
"#]],
728731
)
@@ -1370,42 +1373,48 @@ macro_rules! m {
13701373
}
13711374
/* parse error: expected identifier */
13721375
/* parse error: expected SEMICOLON */
1376+
/* parse error: expected SEMICOLON */
1377+
/* parse error: expected expression */
13731378
fn f() {
1374-
K::C("0");
1379+
K::(C("0"));
13751380
}
1376-
// MACRO_ITEMS@0..17
1377-
// FN@0..17
1381+
// MACRO_ITEMS@0..19
1382+
// FN@0..19
13781383
// FN_KW@0..2 "fn"
13791384
// NAME@2..3
13801385
// IDENT@2..3 "f"
13811386
// PARAM_LIST@3..5
13821387
// L_PAREN@3..4 "("
13831388
// R_PAREN@4..5 ")"
1384-
// BLOCK_EXPR@5..17
1385-
// STMT_LIST@5..17
1389+
// BLOCK_EXPR@5..19
1390+
// STMT_LIST@5..19
13861391
// L_CURLY@5..6 "{"
1387-
// EXPR_STMT@6..9
1388-
// PATH_EXPR@6..9
1389-
// PATH@6..9
1392+
// EXPR_STMT@6..10
1393+
// PATH_EXPR@6..10
1394+
// PATH@6..10
13901395
// PATH@6..7
13911396
// PATH_SEGMENT@6..7
13921397
// NAME_REF@6..7
13931398
// IDENT@6..7 "K"
13941399
// COLON2@7..9 "::"
1395-
// EXPR_STMT@9..16
1396-
// CALL_EXPR@9..15
1397-
// PATH_EXPR@9..10
1398-
// PATH@9..10
1399-
// PATH_SEGMENT@9..10
1400-
// NAME_REF@9..10
1401-
// IDENT@9..10 "C"
1402-
// ARG_LIST@10..15
1403-
// L_PAREN@10..11 "("
1404-
// LITERAL@11..14
1405-
// STRING@11..14 "\"0\""
1406-
// R_PAREN@14..15 ")"
1407-
// SEMICOLON@15..16 ";"
1408-
// R_CURLY@16..17 "}"
1400+
// ERROR@9..10
1401+
// L_PAREN@9..10 "("
1402+
// EXPR_STMT@10..16
1403+
// CALL_EXPR@10..16
1404+
// PATH_EXPR@10..11
1405+
// PATH@10..11
1406+
// PATH_SEGMENT@10..11
1407+
// NAME_REF@10..11
1408+
// IDENT@10..11 "C"
1409+
// ARG_LIST@11..16
1410+
// L_PAREN@11..12 "("
1411+
// LITERAL@12..15
1412+
// STRING@12..15 "\"0\""
1413+
// R_PAREN@15..16 ")"
1414+
// ERROR@16..17
1415+
// R_PAREN@16..17 ")"
1416+
// SEMICOLON@17..18 ";"
1417+
// R_CURLY@18..19 "}"
14091418
14101419
"#]],
14111420
);
@@ -1441,7 +1450,7 @@ fn f() {
14411450
expect![[r#"
14421451
macro_rules! m { ($expr:expr) => { map($expr) } }
14431452
fn f() {
1444-
let _ = map(x+foo);
1453+
let _ = map((x+foo));
14451454
}
14461455
"#]],
14471456
)

crates/hir_def/src/macro_expansion_tests/mbe/regression.rs

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -825,15 +825,18 @@ macro_rules! rgb_color {
825825
};
826826
}
827827
/* parse error: expected type */
828+
/* parse error: expected R_PAREN */
828829
/* parse error: expected R_ANGLE */
829830
/* parse error: expected COMMA */
830831
/* parse error: expected R_ANGLE */
831832
/* parse error: expected SEMICOLON */
833+
/* parse error: expected SEMICOLON */
834+
/* parse error: expected expression */
832835
pub fn new() {
833-
let _ = 0as u32<<8+8;
836+
let _ = 0as u32<<(8+8);
834837
}
835-
// MACRO_ITEMS@0..29
836-
// FN@0..29
838+
// MACRO_ITEMS@0..31
839+
// FN@0..31
837840
// VISIBILITY@0..3
838841
// PUB_KW@0..3 "pub"
839842
// FN_KW@3..5 "fn"
@@ -842,39 +845,45 @@ pub fn new() {
842845
// PARAM_LIST@8..10
843846
// L_PAREN@8..9 "("
844847
// R_PAREN@9..10 ")"
845-
// BLOCK_EXPR@10..29
846-
// STMT_LIST@10..29
848+
// BLOCK_EXPR@10..31
849+
// STMT_LIST@10..31
847850
// L_CURLY@10..11 "{"
848-
// LET_STMT@11..24
851+
// LET_STMT@11..27
849852
// LET_KW@11..14 "let"
850853
// WILDCARD_PAT@14..15
851854
// UNDERSCORE@14..15 "_"
852855
// EQ@15..16 "="
853-
// CAST_EXPR@16..24
856+
// CAST_EXPR@16..27
854857
// LITERAL@16..17
855858
// INT_NUMBER@16..17 "0"
856859
// AS_KW@17..19 "as"
857-
// PATH_TYPE@19..24
858-
// PATH@19..24
859-
// PATH_SEGMENT@19..24
860+
// PATH_TYPE@19..27
861+
// PATH@19..27
862+
// PATH_SEGMENT@19..27
860863
// NAME_REF@19..22
861864
// IDENT@19..22 "u32"
862-
// GENERIC_ARG_LIST@22..24
865+
// GENERIC_ARG_LIST@22..27
863866
// L_ANGLE@22..23 "<"
864-
// TYPE_ARG@23..24
865-
// PATH_TYPE@23..24
866-
// PATH@23..24
867-
// PATH_SEGMENT@23..24
868-
// L_ANGLE@23..24 "<"
869-
// EXPR_STMT@24..28
870-
// BIN_EXPR@24..27
871-
// LITERAL@24..25
872-
// INT_NUMBER@24..25 "8"
873-
// PLUS@25..26 "+"
874-
// LITERAL@26..27
875-
// INT_NUMBER@26..27 "8"
876-
// SEMICOLON@27..28 ";"
877-
// R_CURLY@28..29 "}"
867+
// TYPE_ARG@23..27
868+
// DYN_TRAIT_TYPE@23..27
869+
// TYPE_BOUND_LIST@23..27
870+
// TYPE_BOUND@23..26
871+
// PATH_TYPE@23..26
872+
// PATH@23..26
873+
// PATH_SEGMENT@23..26
874+
// L_ANGLE@23..24 "<"
875+
// PAREN_TYPE@24..26
876+
// L_PAREN@24..25 "("
877+
// ERROR@25..26
878+
// INT_NUMBER@25..26 "8"
879+
// PLUS@26..27 "+"
880+
// EXPR_STMT@27..28
881+
// LITERAL@27..28
882+
// INT_NUMBER@27..28 "8"
883+
// ERROR@28..29
884+
// R_PAREN@28..29 ")"
885+
// SEMICOLON@29..30 ";"
886+
// R_CURLY@30..31 "}"
878887
879888
"#]],
880889
);

crates/mbe/src/expander.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,12 @@ enum Binding {
110110
enum Fragment {
111111
/// token fragments are just copy-pasted into the output
112112
Tokens(tt::TokenTree),
113-
/// Ast fragments are inserted with fake delimiters, so as to make things
114-
/// like `$i * 2` where `$i = 1 + 1` work as expectd.
115-
Ast(tt::TokenTree),
113+
/// Expr ast fragments are surrounded with `()` on insertion to preserve
114+
/// precedence. Note that this impl is different from the one currently in
115+
/// `rustc` -- `rustc` doesn't translate fragments into token trees at all.
116+
///
117+
/// At one point in time, we tried to to use "fake" delimiters here a-la
118+
/// proc-macro delimiter=none. As we later discovered, "none" delimiters are
119+
/// tricky to handle in the parser, and rustc doesn't handle those either.
120+
Expr(tt::TokenTree),
116121
}

crates/mbe/src/expander/matcher.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragmen
736736
}
737737
};
738738
let result = input.expect_fragment(fragment);
739-
result.map(|tt| if kind == "expr" { tt.map(Fragment::Ast) } else { tt.map(Fragment::Tokens) })
739+
result.map(|tt| if kind == "expr" { tt.map(Fragment::Expr) } else { tt.map(Fragment::Tokens) })
740740
}
741741

742742
fn collect_vars(buf: &mut Vec<SmolStr>, pattern: &MetaTemplate) {

crates/mbe/src/expander/transcriber.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,16 @@ fn expand_repeat(
238238
fn push_fragment(buf: &mut Vec<tt::TokenTree>, fragment: Fragment) {
239239
match fragment {
240240
Fragment::Tokens(tt::TokenTree::Subtree(tt)) => push_subtree(buf, tt),
241-
Fragment::Tokens(tt) | Fragment::Ast(tt) => buf.push(tt),
241+
Fragment::Expr(tt::TokenTree::Subtree(mut tt)) => {
242+
if tt.delimiter.is_none() {
243+
tt.delimiter = Some(tt::Delimiter {
244+
id: tt::TokenId::unspecified(),
245+
kind: tt::DelimiterKind::Parenthesis,
246+
})
247+
}
248+
buf.push(tt.into())
249+
}
250+
Fragment::Tokens(tt) | Fragment::Expr(tt) => buf.push(tt),
242251
}
243252
}
244253

crates/mbe/src/subtree_source.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,20 @@ impl<'a> SubtreeTokenSource {
5252
cursor.bump()
5353
}
5454
Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
55-
cached.push(convert_delim(subtree.delimiter_kind(), false));
55+
if let Some(d) = subtree.delimiter_kind() {
56+
cached.push(convert_delim(d, false));
57+
}
5658
cursor.subtree().unwrap()
5759
}
58-
None => {
59-
if let Some(subtree) = cursor.end() {
60-
cached.push(convert_delim(subtree.delimiter_kind(), true));
60+
None => match cursor.end() {
61+
Some(subtree) => {
62+
if let Some(d) = subtree.delimiter_kind() {
63+
cached.push(convert_delim(d, true));
64+
}
6165
cursor.bump()
62-
} else {
63-
continue;
6466
}
65-
}
67+
None => continue,
68+
},
6669
};
6770
}
6871

@@ -109,17 +112,16 @@ impl<'a> TokenSource for SubtreeTokenSource {
109112
}
110113
}
111114

112-
fn convert_delim(d: Option<tt::DelimiterKind>, closing: bool) -> TtToken {
115+
fn convert_delim(d: tt::DelimiterKind, closing: bool) -> TtToken {
113116
let (kinds, texts) = match d {
114-
Some(tt::DelimiterKind::Parenthesis) => ([T!['('], T![')']], "()"),
115-
Some(tt::DelimiterKind::Brace) => ([T!['{'], T!['}']], "{}"),
116-
Some(tt::DelimiterKind::Bracket) => ([T!['['], T![']']], "[]"),
117-
None => ([L_DOLLAR, R_DOLLAR], ""),
117+
tt::DelimiterKind::Parenthesis => ([T!['('], T![')']], "()"),
118+
tt::DelimiterKind::Brace => ([T!['{'], T!['}']], "{}"),
119+
tt::DelimiterKind::Bracket => ([T!['['], T![']']], "[]"),
118120
};
119121

120122
let idx = closing as usize;
121123
let kind = kinds[idx];
122-
let text = if !texts.is_empty() { &texts[idx..texts.len() - (1 - idx)] } else { "" };
124+
let text = &texts[idx..texts.len() - (1 - idx)];
123125
TtToken { tt: Token { kind, is_jointed_to_next: false }, text: SmolStr::new(text) }
124126
}
125127

0 commit comments

Comments
 (0)