1
+ #include "tree_sitter/alloc.h"
1
2
#include "tree_sitter/parser.h"
3
+
2
4
#include <wctype.h>
3
5
4
6
enum TokenType {
5
7
STRING_CONTENT ,
6
- RAW_STRING_LITERAL ,
8
+ RAW_STRING_LITERAL_START ,
9
+ RAW_STRING_LITERAL_CONTENT ,
10
+ RAW_STRING_LITERAL_END ,
7
11
FLOAT_LITERAL ,
8
12
BLOCK_OUTER_DOC_MARKER ,
9
13
BLOCK_INNER_DOC_MARKER ,
@@ -12,15 +16,28 @@ enum TokenType {
12
16
ERROR_SENTINEL
13
17
};
14
18
15
- void * tree_sitter_rust_external_scanner_create () { return NULL ; }
19
+ typedef struct {
20
+ uint8_t opening_hash_count ;
21
+ } Scanner ;
16
22
17
- void tree_sitter_rust_external_scanner_destroy ( void * p ) {}
23
+ void * tree_sitter_rust_external_scanner_create ( ) { return ts_calloc ( 1 , sizeof ( Scanner )); }
18
24
19
- void tree_sitter_rust_external_scanner_reset (void * p ) {}
25
+ void tree_sitter_rust_external_scanner_destroy (void * payload ) { ts_free (( Scanner * ) payload ); }
20
26
21
- unsigned tree_sitter_rust_external_scanner_serialize (void * p , char * buffer ) { return 0 ; }
27
+ unsigned tree_sitter_rust_external_scanner_serialize (void * payload , char * buffer ) {
28
+ Scanner * scanner = (Scanner * )payload ;
29
+ buffer [0 ] = (char )scanner -> opening_hash_count ;
30
+ return 1 ;
31
+ }
22
32
23
- void tree_sitter_rust_external_scanner_deserialize (void * p , const char * b , unsigned n ) {}
33
+ void tree_sitter_rust_external_scanner_deserialize (void * payload , const char * buffer , unsigned length ) {
34
+ Scanner * scanner = (Scanner * )payload ;
35
+ scanner -> opening_hash_count = 0 ;
36
+ if (length == 1 ) {
37
+ Scanner * scanner = (Scanner * )payload ;
38
+ scanner -> opening_hash_count = buffer [0 ];
39
+ }
40
+ }
24
41
25
42
static inline bool is_num_char (int32_t c ) { return c == '_' || iswdigit (c ); }
26
43
@@ -45,8 +62,7 @@ static inline bool process_string(TSLexer *lexer) {
45
62
return has_content ;
46
63
}
47
64
48
- static inline bool process_raw_string (TSLexer * lexer ) {
49
- lexer -> result_symbol = RAW_STRING_LITERAL ;
65
+ static inline bool scan_raw_string_start (Scanner * scanner , TSLexer * lexer ) {
50
66
if (lexer -> lookahead == 'b' || lexer -> lookahead == 'c' ) {
51
67
advance (lexer );
52
68
}
@@ -55,7 +71,7 @@ static inline bool process_raw_string(TSLexer *lexer) {
55
71
}
56
72
advance (lexer );
57
73
58
- unsigned opening_hash_count = 0 ;
74
+ uint8_t opening_hash_count = 0 ;
59
75
while (lexer -> lookahead == '#' ) {
60
76
advance (lexer );
61
77
opening_hash_count ++ ;
@@ -65,20 +81,27 @@ static inline bool process_raw_string(TSLexer *lexer) {
65
81
return false;
66
82
}
67
83
advance (lexer );
84
+ scanner -> opening_hash_count = opening_hash_count ;
68
85
86
+ lexer -> result_symbol = RAW_STRING_LITERAL_START ;
87
+ return true;
88
+ }
89
+
90
+ static inline bool scan_raw_string_content (Scanner * scanner , TSLexer * lexer ) {
69
91
for (;;) {
70
92
if (lexer -> eof (lexer )) {
71
93
return false;
72
94
}
73
95
if (lexer -> lookahead == '"' ) {
96
+ lexer -> mark_end (lexer );
74
97
advance (lexer );
75
98
unsigned hash_count = 0 ;
76
- while (lexer -> lookahead == '#' && hash_count < opening_hash_count ) {
99
+ while (lexer -> lookahead == '#' && hash_count < scanner -> opening_hash_count ) {
77
100
advance (lexer );
78
101
hash_count ++ ;
79
102
}
80
- if (hash_count == opening_hash_count ) {
81
- lexer -> mark_end ( lexer ) ;
103
+ if (hash_count == scanner -> opening_hash_count ) {
104
+ lexer -> result_symbol = RAW_STRING_LITERAL_CONTENT ;
82
105
return true;
83
106
}
84
107
} else {
@@ -87,6 +110,15 @@ static inline bool process_raw_string(TSLexer *lexer) {
87
110
}
88
111
}
89
112
113
+ static inline bool scan_raw_string_end (Scanner * scanner , TSLexer * lexer ) {
114
+ advance (lexer );
115
+ for (unsigned i = 0 ; i < scanner -> opening_hash_count ; i ++ ) {
116
+ advance (lexer );
117
+ }
118
+ lexer -> result_symbol = RAW_STRING_LITERAL_END ;
119
+ return true;
120
+ }
121
+
90
122
static inline bool process_float_literal (TSLexer * lexer ) {
91
123
lexer -> result_symbol = FLOAT_LITERAL ;
92
124
@@ -321,7 +353,10 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const
321
353
return false;
322
354
}
323
355
324
- if (valid_symbols [BLOCK_COMMENT_CONTENT ] || valid_symbols [BLOCK_INNER_DOC_MARKER ] || valid_symbols [BLOCK_OUTER_DOC_MARKER ]) {
356
+ Scanner * scanner = (Scanner * )payload ;
357
+
358
+ if (valid_symbols [BLOCK_COMMENT_CONTENT ] || valid_symbols [BLOCK_INNER_DOC_MARKER ] ||
359
+ valid_symbols [BLOCK_OUTER_DOC_MARKER ]) {
325
360
return process_block_comment (lexer , valid_symbols );
326
361
}
327
362
@@ -337,9 +372,17 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const
337
372
skip (lexer );
338
373
}
339
374
340
- if (valid_symbols [RAW_STRING_LITERAL ] &&
375
+ if (valid_symbols [RAW_STRING_LITERAL_START ] &&
341
376
(lexer -> lookahead == 'r' || lexer -> lookahead == 'b' || lexer -> lookahead == 'c' )) {
342
- return process_raw_string (lexer );
377
+ return scan_raw_string_start (scanner , lexer );
378
+ }
379
+
380
+ if (valid_symbols [RAW_STRING_LITERAL_CONTENT ]) {
381
+ return scan_raw_string_content (scanner , lexer );
382
+ }
383
+
384
+ if (valid_symbols [RAW_STRING_LITERAL_END ] && lexer -> lookahead == '"' ) {
385
+ return scan_raw_string_end (scanner , lexer );
343
386
}
344
387
345
388
if (valid_symbols [FLOAT_LITERAL ] && iswdigit (lexer -> lookahead )) {
0 commit comments