Skip to content

Commit e720646

Browse files
committed
Add parse_to_token_tree
1 parent 013e908 commit e720646

File tree

5 files changed

+213
-37
lines changed

5 files changed

+213
-37
lines changed

crates/ra_hir_expand/src/builtin_macro.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use crate::{
77

88
use crate::{quote, EagerMacroId, LazyMacroId, MacroCallId};
99
use either::Either;
10+
use mbe::parse_to_token_tree;
1011
use ra_db::{FileId, RelativePath};
1112
use ra_parser::FragmentKind;
1213

@@ -306,10 +307,9 @@ fn include_expand(
306307

307308
// FIXME:
308309
// Handle include as expression
309-
let node =
310-
db.parse_or_expand(file_id.into()).ok_or_else(|| mbe::ExpandError::ConversionError)?;
311-
let res =
312-
mbe::syntax_node_to_token_tree(&node).ok_or_else(|| mbe::ExpandError::ConversionError)?.0;
310+
let res = parse_to_token_tree(&db.file_text(file_id.into()))
311+
.ok_or_else(|| mbe::ExpandError::ConversionError)?
312+
.0;
313313

314314
Ok((res, FragmentKind::Items))
315315
}

crates/ra_mbe/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ pub enum ExpandError {
3131
}
3232

3333
pub use crate::syntax_bridge::{
34-
ast_to_token_tree, syntax_node_to_token_tree, token_tree_to_syntax_node, TokenMap,
34+
ast_to_token_tree, parse_to_token_tree, syntax_node_to_token_tree, token_tree_to_syntax_node,
35+
TokenMap,
3536
};
3637

3738
/// This struct contains AST for a single `macro_rules` definition. What might

crates/ra_mbe/src/syntax_bridge.rs

Lines changed: 191 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
33
use ra_parser::{FragmentKind, ParseError, TreeSink};
44
use ra_syntax::{
5-
ast, AstToken, NodeOrToken, Parse, SmolStr, SyntaxKind, SyntaxKind::*, SyntaxNode,
6-
SyntaxTreeBuilder, TextRange, TextUnit, T,
5+
ast::{self, make::tokens::doc_comment},
6+
tokenize, AstToken, NodeOrToken, Parse, SmolStr, SyntaxKind,
7+
SyntaxKind::*,
8+
SyntaxNode, SyntaxTreeBuilder, TextRange, TextUnit, Token, T,
79
};
810
use rustc_hash::FxHashMap;
911
use std::iter::successors;
@@ -48,9 +50,11 @@ pub fn ast_to_token_tree(ast: &impl ast::AstNode) -> Option<(tt::Subtree, TokenM
4850
/// will consume).
4951
pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> Option<(tt::Subtree, TokenMap)> {
5052
let global_offset = node.text_range().start();
51-
let mut c = Convertor { map: TokenMap::default(), global_offset, next_id: 0 };
53+
let mut c = Convertor {
54+
id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } },
55+
};
5256
let subtree = c.go(node)?;
53-
Some((subtree, c.map))
57+
Some((subtree, c.id_alloc.map))
5458
}
5559

5660
// The following items are what `rustc` macro can be parsed into :
@@ -89,6 +93,28 @@ pub fn token_tree_to_syntax_node(
8993
Ok((parse, range_map))
9094
}
9195

96+
/// Convert a string to a `TokenTree`
97+
pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
98+
let (tokens, errors) = tokenize(text);
99+
if !errors.is_empty() {
100+
return None;
101+
}
102+
103+
let mut conv = RawConvertor {
104+
text,
105+
offset: TextUnit::default(),
106+
inner: tokens.iter(),
107+
id_alloc: TokenIdAlloc {
108+
map: Default::default(),
109+
global_offset: TextUnit::default(),
110+
next_id: 0,
111+
},
112+
};
113+
114+
let subtree = conv.go()?;
115+
Some((subtree, conv.id_alloc.map))
116+
}
117+
92118
impl TokenMap {
93119
pub fn token_by_range(&self, relative_range: TextRange) -> Option<tt::TokenId> {
94120
let &(token_id, _) = self.entries.iter().find(|(_, range)| match range {
@@ -118,6 +144,14 @@ impl TokenMap {
118144
self.entries
119145
.push((token_id, TokenTextRange::Delimiter(open_relative_range, close_relative_range)));
120146
}
147+
148+
fn update_close_delim(&mut self, token_id: tt::TokenId, close_relative_range: TextRange) {
149+
if let Some(entry) = self.entries.iter_mut().find(|(tid, _)| *tid == token_id) {
150+
if let TokenTextRange::Delimiter(dim, _) = entry.1 {
151+
entry.1 = TokenTextRange::Delimiter(dim, close_relative_range);
152+
}
153+
}
154+
}
121155
}
122156

123157
/// Returns the textual content of a doc comment block as a quoted string
@@ -188,12 +222,161 @@ fn convert_doc_comment(token: &ra_syntax::SyntaxToken) -> Option<Vec<tt::TokenTr
188222
}
189223
}
190224

191-
struct Convertor {
225+
struct TokenIdAlloc {
192226
map: TokenMap,
193227
global_offset: TextUnit,
194228
next_id: u32,
195229
}
196230

231+
impl TokenIdAlloc {
232+
fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId {
233+
let relative_range = absolute_range - self.global_offset;
234+
let token_id = tt::TokenId(self.next_id);
235+
self.next_id += 1;
236+
self.map.insert(token_id, relative_range);
237+
token_id
238+
}
239+
240+
fn delim(&mut self, open_abs_range: TextRange, close_abs_range: TextRange) -> tt::TokenId {
241+
let open_relative_range = open_abs_range - self.global_offset;
242+
let close_relative_range = close_abs_range - self.global_offset;
243+
let token_id = tt::TokenId(self.next_id);
244+
self.next_id += 1;
245+
246+
self.map.insert_delim(token_id, open_relative_range, close_relative_range);
247+
token_id
248+
}
249+
250+
fn open_delim(&mut self, open_abs_range: TextRange) -> tt::TokenId {
251+
let token_id = tt::TokenId(self.next_id);
252+
self.next_id += 1;
253+
self.map.insert_delim(token_id, open_abs_range, open_abs_range);
254+
token_id
255+
}
256+
257+
fn close_delim(&mut self, id: tt::TokenId, close_abs_range: TextRange) {
258+
self.map.update_close_delim(id, close_abs_range);
259+
}
260+
}
261+
262+
/// A Raw Token (straightly from lexer) convertor
263+
struct RawConvertor<'a> {
264+
text: &'a str,
265+
offset: TextUnit,
266+
id_alloc: TokenIdAlloc,
267+
inner: std::slice::Iter<'a, Token>,
268+
}
269+
270+
impl RawConvertor<'_> {
271+
fn go(&mut self) -> Option<tt::Subtree> {
272+
let mut subtree = tt::Subtree::default();
273+
subtree.delimiter = None;
274+
while self.peek().is_some() {
275+
self.collect_leaf(&mut subtree.token_trees);
276+
}
277+
if subtree.token_trees.is_empty() {
278+
return None;
279+
}
280+
if subtree.token_trees.len() == 1 {
281+
if let tt::TokenTree::Subtree(first) = &subtree.token_trees[0] {
282+
return Some(first.clone());
283+
}
284+
}
285+
Some(subtree)
286+
}
287+
288+
fn bump(&mut self) -> Option<(Token, TextRange)> {
289+
let token = self.inner.next()?;
290+
let range = TextRange::offset_len(self.offset, token.len);
291+
self.offset += token.len;
292+
Some((*token, range))
293+
}
294+
295+
fn peek(&self) -> Option<Token> {
296+
self.inner.as_slice().get(0).cloned()
297+
}
298+
299+
fn collect_leaf(&mut self, result: &mut Vec<tt::TokenTree>) {
300+
let (token, range) = match self.bump() {
301+
None => return,
302+
Some(it) => it,
303+
};
304+
305+
let k: SyntaxKind = token.kind;
306+
if k == COMMENT {
307+
let node = doc_comment(&self.text[range]);
308+
if let Some(tokens) = convert_doc_comment(&node) {
309+
result.extend(tokens);
310+
}
311+
return;
312+
}
313+
314+
result.push(if k.is_punct() {
315+
let delim = match k {
316+
T!['('] => Some((tt::DelimiterKind::Parenthesis, T![')'])),
317+
T!['{'] => Some((tt::DelimiterKind::Brace, T!['}'])),
318+
T!['['] => Some((tt::DelimiterKind::Bracket, T![']'])),
319+
_ => None,
320+
};
321+
322+
if let Some((kind, closed)) = delim {
323+
let mut subtree = tt::Subtree::default();
324+
let id = self.id_alloc.open_delim(range);
325+
subtree.delimiter = Some(tt::Delimiter { kind, id });
326+
327+
while self.peek().map(|it| it.kind != closed).unwrap_or(false) {
328+
self.collect_leaf(&mut subtree.token_trees);
329+
}
330+
let last_range = match self.bump() {
331+
None => return,
332+
Some(it) => it.1,
333+
};
334+
self.id_alloc.close_delim(id, last_range);
335+
subtree.into()
336+
} else {
337+
let spacing = match self.peek() {
338+
Some(next)
339+
if next.kind.is_trivia()
340+
|| next.kind == T!['[']
341+
|| next.kind == T!['{']
342+
|| next.kind == T!['('] =>
343+
{
344+
tt::Spacing::Alone
345+
}
346+
Some(next) if next.kind.is_punct() => tt::Spacing::Joint,
347+
_ => tt::Spacing::Alone,
348+
};
349+
let char =
350+
self.text[range].chars().next().expect("Token from lexer must be single char");
351+
352+
tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc.alloc(range) }).into()
353+
}
354+
} else {
355+
macro_rules! make_leaf {
356+
($i:ident) => {
357+
tt::$i { id: self.id_alloc.alloc(range), text: self.text[range].into() }.into()
358+
};
359+
}
360+
let leaf: tt::Leaf = match k {
361+
T![true] | T![false] => make_leaf!(Literal),
362+
IDENT | LIFETIME => make_leaf!(Ident),
363+
k if k.is_keyword() => make_leaf!(Ident),
364+
k if k.is_literal() => make_leaf!(Literal),
365+
_ => return,
366+
};
367+
368+
leaf.into()
369+
});
370+
}
371+
}
372+
373+
// FIXME: There are some duplicate logic between RawConvertor and Convertor
374+
// It would be nice to refactor to converting SyntaxNode to ra_parser::Token and thus
375+
// use RawConvertor directly. But performance-wise it may not be a good idea ?
376+
struct Convertor {
377+
id_alloc: TokenIdAlloc,
378+
}
379+
197380
impl Convertor {
198381
fn go(&mut self, tt: &SyntaxNode) -> Option<tt::Subtree> {
199382
// This tree is empty
@@ -236,7 +419,7 @@ impl Convertor {
236419
};
237420
let delimiter = delimiter_kind.map(|kind| tt::Delimiter {
238421
kind,
239-
id: self.alloc_delim(first_child.text_range(), last_child.text_range()),
422+
id: self.id_alloc.delim(first_child.text_range(), last_child.text_range()),
240423
});
241424

242425
let mut token_trees = Vec::new();
@@ -273,7 +456,7 @@ impl Convertor {
273456
tt::Leaf::from(tt::Punct {
274457
char,
275458
spacing,
276-
id: self.alloc(token.text_range()),
459+
id: self.id_alloc.alloc(token.text_range()),
277460
})
278461
.into(),
279462
);
@@ -282,7 +465,7 @@ impl Convertor {
282465
macro_rules! make_leaf {
283466
($i:ident) => {
284467
tt::$i {
285-
id: self.alloc(token.text_range()),
468+
id: self.id_alloc.alloc(token.text_range()),
286469
text: token.text().clone(),
287470
}
288471
.into()
@@ -313,28 +496,6 @@ impl Convertor {
313496
let res = tt::Subtree { delimiter, token_trees };
314497
Some(res)
315498
}
316-
317-
fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId {
318-
let relative_range = absolute_range - self.global_offset;
319-
let token_id = tt::TokenId(self.next_id);
320-
self.next_id += 1;
321-
self.map.insert(token_id, relative_range);
322-
token_id
323-
}
324-
325-
fn alloc_delim(
326-
&mut self,
327-
open_abs_range: TextRange,
328-
close_abs_range: TextRange,
329-
) -> tt::TokenId {
330-
let open_relative_range = open_abs_range - self.global_offset;
331-
let close_relative_range = close_abs_range - self.global_offset;
332-
let token_id = tt::TokenId(self.next_id);
333-
self.next_id += 1;
334-
335-
self.map.insert_delim(token_id, open_relative_range, close_relative_range);
336-
token_id
337-
}
338499
}
339500

340501
struct TtTreeSink<'a> {

crates/ra_mbe/src/tests.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,12 +1499,20 @@ impl MacroFixture {
14991499
}
15001500
}
15011501

1502-
pub(crate) fn parse_macro(macro_definition: &str) -> MacroFixture {
1503-
let source_file = ast::SourceFile::parse(macro_definition).ok().unwrap();
1502+
pub(crate) fn parse_macro(ra_fixture: &str) -> MacroFixture {
1503+
let source_file = ast::SourceFile::parse(ra_fixture).ok().unwrap();
15041504
let macro_definition =
15051505
source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap();
15061506

15071507
let (definition_tt, _) = ast_to_token_tree(&macro_definition.token_tree().unwrap()).unwrap();
1508+
1509+
let parsed = parse_to_token_tree(
1510+
&ra_fixture[macro_definition.token_tree().unwrap().syntax().text_range()],
1511+
)
1512+
.unwrap()
1513+
.0;
1514+
assert_eq!(definition_tt, parsed);
1515+
15081516
let rules = MacroRules::parse(&definition_tt).unwrap();
15091517
MacroFixture { rules }
15101518
}

crates/ra_syntax/src/ast/make.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,12 @@ pub mod tokens {
267267
sf.syntax().first_child_or_token().unwrap().into_token().unwrap()
268268
}
269269

270+
pub fn doc_comment(text: &str) -> SyntaxToken {
271+
assert!(!text.trim().is_empty());
272+
let sf = SourceFile::parse(text).ok().unwrap();
273+
sf.syntax().first_child_or_token().unwrap().into_token().unwrap()
274+
}
275+
270276
pub fn literal(text: &str) -> SyntaxToken {
271277
assert_eq!(text.trim(), text);
272278
let lit: ast::Literal = super::ast_from_text(&format!("fn f() {{ let _ = {}; }}", text));

0 commit comments

Comments
 (0)