2
2
3
3
use ra_parser:: { FragmentKind , ParseError , TreeSink } ;
4
4
use ra_syntax:: {
5
- ast, AstToken , NodeOrToken , Parse , SmolStr , SyntaxKind , SyntaxKind :: * , SyntaxNode ,
6
- SyntaxTreeBuilder , TextRange , TextUnit , T ,
5
+ ast:: { self , make:: tokens:: doc_comment} ,
6
+ tokenize, AstToken , NodeOrToken , Parse , SmolStr , SyntaxKind ,
7
+ SyntaxKind :: * ,
8
+ SyntaxNode , SyntaxTreeBuilder , TextRange , TextUnit , Token , T ,
7
9
} ;
8
10
use rustc_hash:: FxHashMap ;
9
11
use std:: iter:: successors;
@@ -48,9 +50,11 @@ pub fn ast_to_token_tree(ast: &impl ast::AstNode) -> Option<(tt::Subtree, TokenM
48
50
/// will consume).
49
51
pub fn syntax_node_to_token_tree ( node : & SyntaxNode ) -> Option < ( tt:: Subtree , TokenMap ) > {
50
52
let global_offset = node. text_range ( ) . start ( ) ;
51
- let mut c = Convertor { map : TokenMap :: default ( ) , global_offset, next_id : 0 } ;
53
+ let mut c = Convertor {
54
+ id_alloc : { TokenIdAlloc { map : TokenMap :: default ( ) , global_offset, next_id : 0 } } ,
55
+ } ;
52
56
let subtree = c. go ( node) ?;
53
- Some ( ( subtree, c. map ) )
57
+ Some ( ( subtree, c. id_alloc . map ) )
54
58
}
55
59
56
60
// The following items are what `rustc` macro can be parsed into :
@@ -89,6 +93,28 @@ pub fn token_tree_to_syntax_node(
89
93
Ok ( ( parse, range_map) )
90
94
}
91
95
96
+ /// Convert a string to a `TokenTree`
97
+ pub fn parse_to_token_tree ( text : & str ) -> Option < ( tt:: Subtree , TokenMap ) > {
98
+ let ( tokens, errors) = tokenize ( text) ;
99
+ if !errors. is_empty ( ) {
100
+ return None ;
101
+ }
102
+
103
+ let mut conv = RawConvertor {
104
+ text,
105
+ offset : TextUnit :: default ( ) ,
106
+ inner : tokens. iter ( ) ,
107
+ id_alloc : TokenIdAlloc {
108
+ map : Default :: default ( ) ,
109
+ global_offset : TextUnit :: default ( ) ,
110
+ next_id : 0 ,
111
+ } ,
112
+ } ;
113
+
114
+ let subtree = conv. go ( ) ?;
115
+ Some ( ( subtree, conv. id_alloc . map ) )
116
+ }
117
+
92
118
impl TokenMap {
93
119
pub fn token_by_range ( & self , relative_range : TextRange ) -> Option < tt:: TokenId > {
94
120
let & ( token_id, _) = self . entries . iter ( ) . find ( |( _, range) | match range {
@@ -118,6 +144,14 @@ impl TokenMap {
118
144
self . entries
119
145
. push ( ( token_id, TokenTextRange :: Delimiter ( open_relative_range, close_relative_range) ) ) ;
120
146
}
147
+
148
+ fn update_close_delim ( & mut self , token_id : tt:: TokenId , close_relative_range : TextRange ) {
149
+ if let Some ( entry) = self . entries . iter_mut ( ) . find ( |( tid, _) | * tid == token_id) {
150
+ if let TokenTextRange :: Delimiter ( dim, _) = entry. 1 {
151
+ entry. 1 = TokenTextRange :: Delimiter ( dim, close_relative_range) ;
152
+ }
153
+ }
154
+ }
121
155
}
122
156
123
157
/// Returns the textual content of a doc comment block as a quoted string
@@ -188,12 +222,161 @@ fn convert_doc_comment(token: &ra_syntax::SyntaxToken) -> Option<Vec<tt::TokenTr
188
222
}
189
223
}
190
224
191
- struct Convertor {
225
+ struct TokenIdAlloc {
192
226
map : TokenMap ,
193
227
global_offset : TextUnit ,
194
228
next_id : u32 ,
195
229
}
196
230
231
+ impl TokenIdAlloc {
232
+ fn alloc ( & mut self , absolute_range : TextRange ) -> tt:: TokenId {
233
+ let relative_range = absolute_range - self . global_offset ;
234
+ let token_id = tt:: TokenId ( self . next_id ) ;
235
+ self . next_id += 1 ;
236
+ self . map . insert ( token_id, relative_range) ;
237
+ token_id
238
+ }
239
+
240
+ fn delim ( & mut self , open_abs_range : TextRange , close_abs_range : TextRange ) -> tt:: TokenId {
241
+ let open_relative_range = open_abs_range - self . global_offset ;
242
+ let close_relative_range = close_abs_range - self . global_offset ;
243
+ let token_id = tt:: TokenId ( self . next_id ) ;
244
+ self . next_id += 1 ;
245
+
246
+ self . map . insert_delim ( token_id, open_relative_range, close_relative_range) ;
247
+ token_id
248
+ }
249
+
250
+ fn open_delim ( & mut self , open_abs_range : TextRange ) -> tt:: TokenId {
251
+ let token_id = tt:: TokenId ( self . next_id ) ;
252
+ self . next_id += 1 ;
253
+ self . map . insert_delim ( token_id, open_abs_range, open_abs_range) ;
254
+ token_id
255
+ }
256
+
257
+ fn close_delim ( & mut self , id : tt:: TokenId , close_abs_range : TextRange ) {
258
+ self . map . update_close_delim ( id, close_abs_range) ;
259
+ }
260
+ }
261
+
262
+ /// A Raw Token (straightly from lexer) convertor
263
+ struct RawConvertor < ' a > {
264
+ text : & ' a str ,
265
+ offset : TextUnit ,
266
+ id_alloc : TokenIdAlloc ,
267
+ inner : std:: slice:: Iter < ' a , Token > ,
268
+ }
269
+
270
+ impl RawConvertor < ' _ > {
271
+ fn go ( & mut self ) -> Option < tt:: Subtree > {
272
+ let mut subtree = tt:: Subtree :: default ( ) ;
273
+ subtree. delimiter = None ;
274
+ while self . peek ( ) . is_some ( ) {
275
+ self . collect_leaf ( & mut subtree. token_trees ) ;
276
+ }
277
+ if subtree. token_trees . is_empty ( ) {
278
+ return None ;
279
+ }
280
+ if subtree. token_trees . len ( ) == 1 {
281
+ if let tt:: TokenTree :: Subtree ( first) = & subtree. token_trees [ 0 ] {
282
+ return Some ( first. clone ( ) ) ;
283
+ }
284
+ }
285
+ Some ( subtree)
286
+ }
287
+
288
+ fn bump ( & mut self ) -> Option < ( Token , TextRange ) > {
289
+ let token = self . inner . next ( ) ?;
290
+ let range = TextRange :: offset_len ( self . offset , token. len ) ;
291
+ self . offset += token. len ;
292
+ Some ( ( * token, range) )
293
+ }
294
+
295
+ fn peek ( & self ) -> Option < Token > {
296
+ self . inner . as_slice ( ) . get ( 0 ) . cloned ( )
297
+ }
298
+
299
+ fn collect_leaf ( & mut self , result : & mut Vec < tt:: TokenTree > ) {
300
+ let ( token, range) = match self . bump ( ) {
301
+ None => return ,
302
+ Some ( it) => it,
303
+ } ;
304
+
305
+ let k: SyntaxKind = token. kind ;
306
+ if k == COMMENT {
307
+ let node = doc_comment ( & self . text [ range] ) ;
308
+ if let Some ( tokens) = convert_doc_comment ( & node) {
309
+ result. extend ( tokens) ;
310
+ }
311
+ return ;
312
+ }
313
+
314
+ result. push ( if k. is_punct ( ) {
315
+ let delim = match k {
316
+ T ! [ '(' ] => Some ( ( tt:: DelimiterKind :: Parenthesis , T ! [ ')' ] ) ) ,
317
+ T ! [ '{' ] => Some ( ( tt:: DelimiterKind :: Brace , T ! [ '}' ] ) ) ,
318
+ T ! [ '[' ] => Some ( ( tt:: DelimiterKind :: Bracket , T ! [ ']' ] ) ) ,
319
+ _ => None ,
320
+ } ;
321
+
322
+ if let Some ( ( kind, closed) ) = delim {
323
+ let mut subtree = tt:: Subtree :: default ( ) ;
324
+ let id = self . id_alloc . open_delim ( range) ;
325
+ subtree. delimiter = Some ( tt:: Delimiter { kind, id } ) ;
326
+
327
+ while self . peek ( ) . map ( |it| it. kind != closed) . unwrap_or ( false ) {
328
+ self . collect_leaf ( & mut subtree. token_trees ) ;
329
+ }
330
+ let last_range = match self . bump ( ) {
331
+ None => return ,
332
+ Some ( it) => it. 1 ,
333
+ } ;
334
+ self . id_alloc . close_delim ( id, last_range) ;
335
+ subtree. into ( )
336
+ } else {
337
+ let spacing = match self . peek ( ) {
338
+ Some ( next)
339
+ if next. kind . is_trivia ( )
340
+ || next. kind == T ! [ '[' ]
341
+ || next. kind == T ! [ '{' ]
342
+ || next. kind == T ! [ '(' ] =>
343
+ {
344
+ tt:: Spacing :: Alone
345
+ }
346
+ Some ( next) if next. kind . is_punct ( ) => tt:: Spacing :: Joint ,
347
+ _ => tt:: Spacing :: Alone ,
348
+ } ;
349
+ let char =
350
+ self . text [ range] . chars ( ) . next ( ) . expect ( "Token from lexer must be single char" ) ;
351
+
352
+ tt:: Leaf :: from ( tt:: Punct { char, spacing, id : self . id_alloc . alloc ( range) } ) . into ( )
353
+ }
354
+ } else {
355
+ macro_rules! make_leaf {
356
+ ( $i: ident) => {
357
+ tt:: $i { id: self . id_alloc. alloc( range) , text: self . text[ range] . into( ) } . into( )
358
+ } ;
359
+ }
360
+ let leaf: tt:: Leaf = match k {
361
+ T ! [ true ] | T ! [ false ] => make_leaf ! ( Literal ) ,
362
+ IDENT | LIFETIME => make_leaf ! ( Ident ) ,
363
+ k if k. is_keyword ( ) => make_leaf ! ( Ident ) ,
364
+ k if k. is_literal ( ) => make_leaf ! ( Literal ) ,
365
+ _ => return ,
366
+ } ;
367
+
368
+ leaf. into ( )
369
+ } ) ;
370
+ }
371
+ }
372
+
373
+ // FIXME: There are some duplicate logic between RawConvertor and Convertor
374
+ // It would be nice to refactor to converting SyntaxNode to ra_parser::Token and thus
375
+ // use RawConvertor directly. But performance-wise it may not be a good idea ?
376
+ struct Convertor {
377
+ id_alloc : TokenIdAlloc ,
378
+ }
379
+
197
380
impl Convertor {
198
381
fn go ( & mut self , tt : & SyntaxNode ) -> Option < tt:: Subtree > {
199
382
// This tree is empty
@@ -236,7 +419,7 @@ impl Convertor {
236
419
} ;
237
420
let delimiter = delimiter_kind. map ( |kind| tt:: Delimiter {
238
421
kind,
239
- id : self . alloc_delim ( first_child. text_range ( ) , last_child. text_range ( ) ) ,
422
+ id : self . id_alloc . delim ( first_child. text_range ( ) , last_child. text_range ( ) ) ,
240
423
} ) ;
241
424
242
425
let mut token_trees = Vec :: new ( ) ;
@@ -273,7 +456,7 @@ impl Convertor {
273
456
tt:: Leaf :: from ( tt:: Punct {
274
457
char,
275
458
spacing,
276
- id : self . alloc ( token. text_range ( ) ) ,
459
+ id : self . id_alloc . alloc ( token. text_range ( ) ) ,
277
460
} )
278
461
. into ( ) ,
279
462
) ;
@@ -282,7 +465,7 @@ impl Convertor {
282
465
macro_rules! make_leaf {
283
466
( $i: ident) => {
284
467
tt:: $i {
285
- id: self . alloc( token. text_range( ) ) ,
468
+ id: self . id_alloc . alloc( token. text_range( ) ) ,
286
469
text: token. text( ) . clone( ) ,
287
470
}
288
471
. into( )
@@ -313,28 +496,6 @@ impl Convertor {
313
496
let res = tt:: Subtree { delimiter, token_trees } ;
314
497
Some ( res)
315
498
}
316
-
317
- fn alloc ( & mut self , absolute_range : TextRange ) -> tt:: TokenId {
318
- let relative_range = absolute_range - self . global_offset ;
319
- let token_id = tt:: TokenId ( self . next_id ) ;
320
- self . next_id += 1 ;
321
- self . map . insert ( token_id, relative_range) ;
322
- token_id
323
- }
324
-
325
- fn alloc_delim (
326
- & mut self ,
327
- open_abs_range : TextRange ,
328
- close_abs_range : TextRange ,
329
- ) -> tt:: TokenId {
330
- let open_relative_range = open_abs_range - self . global_offset ;
331
- let close_relative_range = close_abs_range - self . global_offset ;
332
- let token_id = tt:: TokenId ( self . next_id ) ;
333
- self . next_id += 1 ;
334
-
335
- self . map . insert_delim ( token_id, open_relative_range, close_relative_range) ;
336
- token_id
337
- }
338
499
}
339
500
340
501
struct TtTreeSink < ' a > {
0 commit comments