Skip to content

Commit a13f2d1

Browse files
authored
Merge pull request #405 from eed3si9n/wip/array
refactor(scanner): use new array header for stack
2 parents 599d12b + 3fb3931 commit a13f2d1

File tree

4 files changed

+137
-186
lines changed

4 files changed

+137
-186
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,6 @@ jobs:
7272
with:
7373
node-version: 20
7474

75-
- name: Test C stack code
76-
run: gcc test/test-stack.c -o a.out && ./a.out
77-
7875
- name: Generate parser from scratch and test it
7976
if: ${{ runner.os == 'Linux' || needs.changedfiles.outputs.c }}
8077
shell: bash

src/scanner.c

Lines changed: 137 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
1-
#include "stack.h"
1+
#include "tree_sitter/alloc.h"
2+
#include "tree_sitter/array.h"
23
#include "tree_sitter/parser.h"
3-
#include <stdio.h>
4-
#include <string.h>
4+
55
#include <wctype.h>
66

7+
// #define DEBUG
8+
9+
#ifdef DEBUG
10+
#define LOG(...) fprintf(stderr, __VA_ARGS__)
11+
#else
12+
#define LOG(...)
13+
#endif
14+
715
enum TokenType {
816
AUTOMATIC_SEMICOLON,
917
INDENT,
@@ -22,26 +30,82 @@ enum TokenType {
2230
WITH,
2331
};
2432

33+
typedef struct {
34+
Array(int16_t) indents;
35+
int16_t last_indentation_size;
36+
int16_t last_newline_count;
37+
int16_t last_column;
38+
} Scanner;
39+
2540
void *tree_sitter_scala_external_scanner_create() {
26-
return createStack();
41+
Scanner *scanner = ts_calloc(1, sizeof(Scanner));
42+
array_init(&scanner->indents);
43+
scanner->last_indentation_size = -1;
44+
scanner->last_column = -1;
45+
return scanner;
2746
}
2847

2948
void tree_sitter_scala_external_scanner_destroy(void *payload) {
30-
free(payload);
49+
Scanner *scanner = payload;
50+
array_delete(&scanner->indents);
51+
ts_free(scanner);
3152
}
3253

3354
unsigned tree_sitter_scala_external_scanner_serialize(void *payload, char *buffer) {
34-
return serialiseStack(payload, buffer);
55+
Scanner *scanner = (Scanner*)payload;
56+
57+
if ((scanner->indents.size + 3) * sizeof(int16_t) > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
58+
return 0;
59+
}
60+
61+
size_t size = 0;
62+
*(int16_t *)&buffer[size] = scanner->last_indentation_size;
63+
size += sizeof(int16_t);
64+
*(int16_t *)&buffer[size] = scanner->last_newline_count;
65+
size += sizeof(int16_t);
66+
*(int16_t *)&buffer[size] = scanner->last_column;
67+
size += sizeof(int16_t);
68+
69+
for (unsigned i = 0; i < scanner->indents.size; i++) {
70+
*(int16_t *)&buffer[size] = scanner->indents.contents[i];
71+
size += sizeof(int16_t);
72+
}
73+
74+
return size;
3575
}
3676

3777
void tree_sitter_scala_external_scanner_deserialize(void *payload, const char *buffer,
3878
unsigned length) {
39-
deserialiseStack(payload, buffer, length);
79+
Scanner *scanner = (Scanner*)payload;
80+
array_clear(&scanner->indents);
81+
scanner->last_indentation_size = -1;
82+
scanner->last_column = -1;
83+
scanner->last_newline_count = 0;
84+
85+
if (length == 0) {
86+
return;
87+
}
88+
89+
size_t size = 0;
90+
91+
scanner->last_indentation_size = *(int16_t *)&buffer[size];
92+
size += sizeof(int16_t);
93+
scanner->last_newline_count = *(int16_t *)&buffer[size];
94+
size += sizeof(int16_t);
95+
scanner->last_column = *(int16_t *)&buffer[size];
96+
size += sizeof(int16_t);
97+
98+
while (size < length) {
99+
array_push(&scanner->indents, *(int16_t *)&buffer[size]);
100+
size += sizeof(int16_t);
101+
}
102+
103+
assert(size == length);
40104
}
41105

42-
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
106+
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
43107

44-
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
108+
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
45109

46110
static bool scan_string_content(TSLexer *lexer, bool is_multiline, bool has_interpolation) {
47111
unsigned closing_quote_count = 0;
@@ -102,7 +166,7 @@ static bool detect_comment_start(TSLexer *lexer) {
102166
}
103167

104168
static bool scan_word(TSLexer *lexer, const char* const word) {
105-
for (int i = 0; word[i] != '\0'; i++) {
169+
for (uint8_t i = 0; word[i] != '\0'; i++) {
106170
if (lexer->lookahead != word[i]) {
107171
return false;
108172
}
@@ -111,12 +175,20 @@ static bool scan_word(TSLexer *lexer, const char* const word) {
111175
return !iswalnum(lexer->lookahead);
112176
}
113177

178+
static inline void debug_indents(Scanner *scanner) {
179+
LOG(" indents(%d): ", scanner->indents.size);
180+
for (unsigned i = 0; i < scanner->indents.size; i++) {
181+
LOG("%d ", scanner->indents.contents[i]);
182+
}
183+
LOG("\n");
184+
}
185+
114186
bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
115187
const bool *valid_symbols) {
116-
ScannerStack *stack = (ScannerStack *)payload;
117-
int prev = peekStack(stack);
118-
int newline_count = 0;
119-
int indentation_size = 0;
188+
Scanner *scanner = (Scanner *)payload;
189+
int16_t prev = scanner->indents.size > 0 ? *array_back(&scanner->indents) : -1;
190+
int16_t newline_count = 0;
191+
int16_t indentation_size = 0;
120192

121193
while (iswspace(lexer->lookahead)) {
122194
if (lexer->lookahead == '\n') {
@@ -130,35 +202,47 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
130202
}
131203

132204
// Before advancing the lexer, check if we can double outdent
133-
if (valid_symbols[OUTDENT] &&
134-
(lexer->lookahead == 0 ||
205+
if (
206+
valid_symbols[OUTDENT] &&
135207
(
136-
(prev != -1) &&
137-
lexer->lookahead == ')' ||
138-
lexer->lookahead == ']' ||
139-
lexer->lookahead == '}'
140-
) || (
141-
stack->last_indentation_size != -1 &&
142-
prev != -1 &&
143-
stack->last_indentation_size < prev))) {
144-
popStack(stack);
208+
lexer->lookahead == 0 ||
209+
(
210+
prev != -1 &&
211+
(
212+
lexer->lookahead == ')' ||
213+
lexer->lookahead == ']' ||
214+
lexer->lookahead == '}'
215+
)
216+
) ||
217+
(
218+
scanner->last_indentation_size != -1 &&
219+
prev != -1 &&
220+
scanner->last_indentation_size < prev
221+
)
222+
)
223+
) {
224+
if (scanner->indents.size > 0) {
225+
array_pop(&scanner->indents);
226+
}
145227
LOG(" pop\n");
146228
LOG(" OUTDENT\n");
147229
lexer->result_symbol = OUTDENT;
148230
return true;
149231
}
150-
stack->last_indentation_size = -1;
151-
152-
printStack(stack, " before");
232+
scanner->last_indentation_size = -1;
153233

154-
if (valid_symbols[INDENT] &&
234+
if (
235+
valid_symbols[INDENT] &&
155236
newline_count > 0 &&
156-
(isEmptyStack(stack) ||
157-
indentation_size > peekStack(stack))) {
237+
(
238+
scanner->indents.size == 0 ||
239+
indentation_size > *array_back(&scanner->indents)
240+
)
241+
) {
158242
if (detect_comment_start(lexer)) {
159243
return false;
160244
}
161-
pushStack(stack, indentation_size);
245+
array_push(&scanner->indents, indentation_size);
162246
lexer->result_symbol = INDENT;
163247
LOG(" INDENT\n");
164248
return true;
@@ -167,38 +251,44 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
167251
// This saves the indentation_size and newline_count so it can be used
168252
// in subsequent calls for multiple outdent or autosemicolon.
169253
if (valid_symbols[OUTDENT] &&
170-
(lexer->lookahead == 0 || (
254+
(lexer->lookahead == 0 ||
255+
(
171256
newline_count > 0 &&
172257
prev != -1 &&
173-
indentation_size < prev))) {
174-
popStack(stack);
258+
indentation_size < prev
259+
)
260+
)
261+
) {
262+
if (scanner->indents.size > 0) {
263+
array_pop(&scanner->indents);
264+
}
175265
LOG(" pop\n");
176266
LOG(" OUTDENT\n");
177267
lexer->result_symbol = OUTDENT;
178268
lexer->mark_end(lexer);
179269
if (detect_comment_start(lexer)) {
180270
return false;
181271
}
182-
stack->last_indentation_size = indentation_size;
183-
stack->last_newline_count = newline_count;
272+
scanner->last_indentation_size = indentation_size;
273+
scanner->last_newline_count = newline_count;
184274
if (lexer->eof(lexer)) {
185-
stack->last_column = -1;
275+
scanner->last_column = -1;
186276
} else {
187-
stack->last_column = (int)lexer->get_column(lexer);
277+
scanner->last_column = (int16_t)lexer->get_column(lexer);
188278
}
189279
return true;
190280
}
191281

192282
// Recover newline_count from the outdent reset
193283
bool is_eof = lexer->eof(lexer);
194-
if (stack->last_newline_count > 0 &&
195-
((is_eof && stack->last_column == -1) ||
196-
(!is_eof && lexer->get_column(lexer) == stack->last_column))) {
197-
newline_count += stack->last_newline_count;
284+
if (
285+
scanner->last_newline_count > 0 &&
286+
(is_eof && scanner->last_column == -1) ||
287+
(!is_eof && lexer->get_column(lexer) == (uint32_t)scanner->last_column)
288+
) {
289+
newline_count += scanner->last_newline_count;
198290
}
199-
stack->last_newline_count = 0;
200-
201-
printStack(stack, " after");
291+
scanner->last_newline_count = 0;
202292

203293
if (valid_symbols[AUTOMATIC_SEMICOLON] && newline_count > 0) {
204294
// AUTOMATIC_SEMICOLON should not be issued in the middle of expressions
@@ -240,7 +330,7 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
240330
}
241331
skip(lexer);
242332
}
243-
// If some code is present at the same line after comment end,
333+
// If some code is present at the same line after comment end,
244334
// we should still produce AUTOMATIC_SEMICOLON, e.g. in
245335
// val a = 1
246336
// /* comment */ val b = 2

src/stack.h

Lines changed: 0 additions & 90 deletions
This file was deleted.

0 commit comments

Comments
 (0)