1
- #include "stack.h"
1
+ #include "tree_sitter/alloc.h"
2
+ #include "tree_sitter/array.h"
2
3
#include "tree_sitter/parser.h"
3
- #include <stdio.h>
4
- #include <string.h>
4
+
5
5
#include <wctype.h>
6
6
7
+ // #define DEBUG
8
+
9
+ #ifdef DEBUG
10
+ #define LOG (...) fprintf(stderr, __VA_ARGS__)
11
+ #else
12
+ #define LOG (...)
13
+ #endif
14
+
7
15
enum TokenType {
8
16
AUTOMATIC_SEMICOLON ,
9
17
INDENT ,
@@ -22,26 +30,82 @@ enum TokenType {
22
30
WITH ,
23
31
};
24
32
33
+ typedef struct {
34
+ Array (int16_t ) indents ;
35
+ int16_t last_indentation_size ;
36
+ int16_t last_newline_count ;
37
+ int16_t last_column ;
38
+ } Scanner ;
39
+
25
40
void * tree_sitter_scala_external_scanner_create () {
26
- return createStack ();
41
+ Scanner * scanner = ts_calloc (1 , sizeof (Scanner ));
42
+ array_init (& scanner -> indents );
43
+ scanner -> last_indentation_size = -1 ;
44
+ scanner -> last_column = -1 ;
45
+ return scanner ;
27
46
}
28
47
29
48
void tree_sitter_scala_external_scanner_destroy (void * payload ) {
30
- free (payload );
49
+ Scanner * scanner = payload ;
50
+ array_delete (& scanner -> indents );
51
+ ts_free (scanner );
31
52
}
32
53
33
54
unsigned tree_sitter_scala_external_scanner_serialize (void * payload , char * buffer ) {
34
- return serialiseStack (payload , buffer );
55
+ Scanner * scanner = (Scanner * )payload ;
56
+
57
+ if ((scanner -> indents .size + 3 ) * sizeof (int16_t ) > TREE_SITTER_SERIALIZATION_BUFFER_SIZE ) {
58
+ return 0 ;
59
+ }
60
+
61
+ size_t size = 0 ;
62
+ * (int16_t * )& buffer [size ] = scanner -> last_indentation_size ;
63
+ size += sizeof (int16_t );
64
+ * (int16_t * )& buffer [size ] = scanner -> last_newline_count ;
65
+ size += sizeof (int16_t );
66
+ * (int16_t * )& buffer [size ] = scanner -> last_column ;
67
+ size += sizeof (int16_t );
68
+
69
+ for (unsigned i = 0 ; i < scanner -> indents .size ; i ++ ) {
70
+ * (int16_t * )& buffer [size ] = scanner -> indents .contents [i ];
71
+ size += sizeof (int16_t );
72
+ }
73
+
74
+ return size ;
35
75
}
36
76
37
77
void tree_sitter_scala_external_scanner_deserialize (void * payload , const char * buffer ,
38
78
unsigned length ) {
39
- deserialiseStack (payload , buffer , length );
79
+ Scanner * scanner = (Scanner * )payload ;
80
+ array_clear (& scanner -> indents );
81
+ scanner -> last_indentation_size = -1 ;
82
+ scanner -> last_column = -1 ;
83
+ scanner -> last_newline_count = 0 ;
84
+
85
+ if (length == 0 ) {
86
+ return ;
87
+ }
88
+
89
+ size_t size = 0 ;
90
+
91
+ scanner -> last_indentation_size = * (int16_t * )& buffer [size ];
92
+ size += sizeof (int16_t );
93
+ scanner -> last_newline_count = * (int16_t * )& buffer [size ];
94
+ size += sizeof (int16_t );
95
+ scanner -> last_column = * (int16_t * )& buffer [size ];
96
+ size += sizeof (int16_t );
97
+
98
+ while (size < length ) {
99
+ array_push (& scanner -> indents , * (int16_t * )& buffer [size ]);
100
+ size += sizeof (int16_t );
101
+ }
102
+
103
+ assert (size == length );
40
104
}
41
105
42
- static void advance (TSLexer * lexer ) { lexer -> advance (lexer , false); }
106
+ static inline void advance (TSLexer * lexer ) { lexer -> advance (lexer , false); }
43
107
44
- static void skip (TSLexer * lexer ) { lexer -> advance (lexer , true); }
108
+ static inline void skip (TSLexer * lexer ) { lexer -> advance (lexer , true); }
45
109
46
110
static bool scan_string_content (TSLexer * lexer , bool is_multiline , bool has_interpolation ) {
47
111
unsigned closing_quote_count = 0 ;
@@ -102,7 +166,7 @@ static bool detect_comment_start(TSLexer *lexer) {
102
166
}
103
167
104
168
static bool scan_word (TSLexer * lexer , const char * const word ) {
105
- for (int i = 0 ; word [i ] != '\0' ; i ++ ) {
169
+ for (uint8_t i = 0 ; word [i ] != '\0' ; i ++ ) {
106
170
if (lexer -> lookahead != word [i ]) {
107
171
return false;
108
172
}
@@ -111,12 +175,20 @@ static bool scan_word(TSLexer *lexer, const char* const word) {
111
175
return !iswalnum (lexer -> lookahead );
112
176
}
113
177
178
+ static inline void debug_indents (Scanner * scanner ) {
179
+ LOG (" indents(%d): " , scanner -> indents .size );
180
+ for (unsigned i = 0 ; i < scanner -> indents .size ; i ++ ) {
181
+ LOG ("%d " , scanner -> indents .contents [i ]);
182
+ }
183
+ LOG ("\n" );
184
+ }
185
+
114
186
bool tree_sitter_scala_external_scanner_scan (void * payload , TSLexer * lexer ,
115
187
const bool * valid_symbols ) {
116
- ScannerStack * stack = (ScannerStack * )payload ;
117
- int prev = peekStack ( stack ) ;
118
- int newline_count = 0 ;
119
- int indentation_size = 0 ;
188
+ Scanner * scanner = (Scanner * )payload ;
189
+ int16_t prev = scanner -> indents . size > 0 ? * array_back ( & scanner -> indents ) : -1 ;
190
+ int16_t newline_count = 0 ;
191
+ int16_t indentation_size = 0 ;
120
192
121
193
while (iswspace (lexer -> lookahead )) {
122
194
if (lexer -> lookahead == '\n' ) {
@@ -130,35 +202,47 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
130
202
}
131
203
132
204
// Before advancing the lexer, check if we can double outdent
133
- if (valid_symbols [ OUTDENT ] &&
134
- ( lexer -> lookahead == 0 ||
205
+ if (
206
+ valid_symbols [ OUTDENT ] &&
135
207
(
136
- (prev != -1 ) &&
137
- lexer -> lookahead == ')' ||
138
- lexer -> lookahead == ']' ||
139
- lexer -> lookahead == '}'
140
- ) || (
141
- stack -> last_indentation_size != -1 &&
142
- prev != -1 &&
143
- stack -> last_indentation_size < prev ))) {
144
- popStack (stack );
208
+ lexer -> lookahead == 0 ||
209
+ (
210
+ prev != -1 &&
211
+ (
212
+ lexer -> lookahead == ')' ||
213
+ lexer -> lookahead == ']' ||
214
+ lexer -> lookahead == '}'
215
+ )
216
+ ) ||
217
+ (
218
+ scanner -> last_indentation_size != -1 &&
219
+ prev != -1 &&
220
+ scanner -> last_indentation_size < prev
221
+ )
222
+ )
223
+ ) {
224
+ if (scanner -> indents .size > 0 ) {
225
+ array_pop (& scanner -> indents );
226
+ }
145
227
LOG (" pop\n" );
146
228
LOG (" OUTDENT\n" );
147
229
lexer -> result_symbol = OUTDENT ;
148
230
return true;
149
231
}
150
- stack -> last_indentation_size = -1 ;
151
-
152
- printStack (stack , " before" );
232
+ scanner -> last_indentation_size = -1 ;
153
233
154
- if (valid_symbols [INDENT ] &&
234
+ if (
235
+ valid_symbols [INDENT ] &&
155
236
newline_count > 0 &&
156
- (isEmptyStack (stack ) ||
157
- indentation_size > peekStack (stack ))) {
237
+ (
238
+ scanner -> indents .size == 0 ||
239
+ indentation_size > * array_back (& scanner -> indents )
240
+ )
241
+ ) {
158
242
if (detect_comment_start (lexer )) {
159
243
return false;
160
244
}
161
- pushStack ( stack , indentation_size );
245
+ array_push ( & scanner -> indents , indentation_size );
162
246
lexer -> result_symbol = INDENT ;
163
247
LOG (" INDENT\n" );
164
248
return true;
@@ -167,38 +251,44 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
167
251
// This saves the indentation_size and newline_count so it can be used
168
252
// in subsequent calls for multiple outdent or autosemicolon.
169
253
if (valid_symbols [OUTDENT ] &&
170
- (lexer -> lookahead == 0 || (
254
+ (lexer -> lookahead == 0 ||
255
+ (
171
256
newline_count > 0 &&
172
257
prev != -1 &&
173
- indentation_size < prev ))) {
174
- popStack (stack );
258
+ indentation_size < prev
259
+ )
260
+ )
261
+ ) {
262
+ if (scanner -> indents .size > 0 ) {
263
+ array_pop (& scanner -> indents );
264
+ }
175
265
LOG (" pop\n" );
176
266
LOG (" OUTDENT\n" );
177
267
lexer -> result_symbol = OUTDENT ;
178
268
lexer -> mark_end (lexer );
179
269
if (detect_comment_start (lexer )) {
180
270
return false;
181
271
}
182
- stack -> last_indentation_size = indentation_size ;
183
- stack -> last_newline_count = newline_count ;
272
+ scanner -> last_indentation_size = indentation_size ;
273
+ scanner -> last_newline_count = newline_count ;
184
274
if (lexer -> eof (lexer )) {
185
- stack -> last_column = -1 ;
275
+ scanner -> last_column = -1 ;
186
276
} else {
187
- stack -> last_column = (int )lexer -> get_column (lexer );
277
+ scanner -> last_column = (int16_t )lexer -> get_column (lexer );
188
278
}
189
279
return true;
190
280
}
191
281
192
282
// Recover newline_count from the outdent reset
193
283
bool is_eof = lexer -> eof (lexer );
194
- if (stack -> last_newline_count > 0 &&
195
- ((is_eof && stack -> last_column == -1 ) ||
196
- (!is_eof && lexer -> get_column (lexer ) == stack -> last_column ))) {
197
- newline_count += stack -> last_newline_count ;
284
+ if (
285
+ scanner -> last_newline_count > 0 &&
286
+ (is_eof && scanner -> last_column == -1 ) ||
287
+ (!is_eof && lexer -> get_column (lexer ) == (uint32_t )scanner -> last_column )
288
+ ) {
289
+ newline_count += scanner -> last_newline_count ;
198
290
}
199
- stack -> last_newline_count = 0 ;
200
-
201
- printStack (stack , " after" );
291
+ scanner -> last_newline_count = 0 ;
202
292
203
293
if (valid_symbols [AUTOMATIC_SEMICOLON ] && newline_count > 0 ) {
204
294
// AUTOMATIC_SEMICOLON should not be issued in the middle of expressions
@@ -240,7 +330,7 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
240
330
}
241
331
skip (lexer );
242
332
}
243
- // If some code is present at the same line after comment end,
333
+ // If some code is present at the same line after comment end,
244
334
// we should still produce AUTOMATIC_SEMICOLON, e.g. in
245
335
// val a = 1
246
336
// /* comment */ val b = 2
0 commit comments