Skip to content

Commit a31fe42

Browse files
committed
designate doc comments
1 parent eeb0702 commit a31fe42

File tree

3 files changed

+155
-10
lines changed

3 files changed

+155
-10
lines changed

corpus/source_files.txt

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,76 @@ Line comments
5555
(source_file
5656
(line_comment))
5757

58+
============================================
59+
Doc comments
60+
============================================
61+
62+
/// Doc
63+
/// Comment
64+
// / Now a line comment (note the space separating the third slash)
65+
66+
/// Doc
67+
/// Comment
68+
//// Four slashes makes the line a normal comment
69+
/// Doc comment got interrupted by the line above
70+
71+
//! Inner doc comment line 1
72+
//! Inner doc comment line 2
73+
/// This is different doc comment since the line starts differently
74+
//! Back to inner doc comment
75+
76+
----
77+
78+
(source_file
79+
(doc_comment)
80+
(line_comment)
81+
(doc_comment)
82+
(line_comment)
83+
(doc_comment)
84+
(doc_comment)
85+
(doc_comment)
86+
(doc_comment))
87+
88+
=================================================
89+
Doc comments recursion guard 1 (should not hang)
90+
=================================================
91+
92+
//!
93+
94+
---
95+
96+
(source_file (doc_comment))
97+
98+
=================================================
99+
Doc comments recursion guard 2 (should not hang)
100+
=================================================
101+
102+
///
103+
104+
---
105+
106+
(source_file (doc_comment))
107+
108+
=================================================
109+
Comments recursion guard 1 (should not hang)
110+
=================================================
111+
112+
//
113+
114+
---
115+
116+
(source_file (line_comment))
117+
118+
=================================================
119+
Block comments recursion guard 1 (should not hang)
120+
=================================================
121+
122+
/*
123+
124+
---
125+
126+
(source_file (ERROR))
127+
58128
=====================================
59129
Greek letters in identifiers
60130
=====================================

grammar.js

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,15 @@ const primitive_types = numeric_types.concat(['bool', 'str', 'char'])
3838
module.exports = grammar({
3939
name: 'rust',
4040

41-
extras: $ => [/\s/, $.line_comment, $.block_comment],
41+
extras: $ => [/\s/, $.line_comment, $.block_comment, $.doc_comment],
4242

4343
externals: $ => [
4444
$._string_content,
4545
$.raw_string_literal,
4646
$.float_literal,
4747
$.block_comment,
48+
$.line_comment,
49+
$.doc_comment
4850
],
4951

5052
supertypes: $ => [
@@ -1426,15 +1428,6 @@ module.exports = grammar({
14261428

14271429
boolean_literal: $ => choice('true', 'false'),
14281430

1429-
comment: $ => choice(
1430-
$.line_comment,
1431-
$.block_comment
1432-
),
1433-
1434-
line_comment: $ => token(seq(
1435-
'//', /.*/
1436-
)),
1437-
14381431
_path: $ => choice(
14391432
$.self,
14401433
alias(choice(...primitive_types), $.identifier),

src/scanner.c

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ enum TokenType {
66
RAW_STRING_LITERAL,
77
FLOAT_LITERAL,
88
BLOCK_COMMENT,
9+
LINE_COMMENT,
10+
DOC_COMMENT,
911
};
1012

1113
void *tree_sitter_rust_external_scanner_create() { return NULL; }
@@ -143,7 +145,87 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer,
143145

144146
if (lexer->lookahead == '/') {
145147
advance(lexer);
148+
149+
if ((valid_symbols[LINE_COMMENT] || valid_symbols[DOC_COMMENT]) && lexer->lookahead == '/') {
150+
advance(lexer);
151+
152+
bool started_with_slash = lexer->lookahead == '/';
153+
switch (lexer->lookahead) {
154+
case '!':
155+
case '/': {
156+
advance(lexer);
157+
158+
// If three consecutive slashes were seen and this is the fourth one,
159+
// the line turns back to a normal comment.
160+
// The above rule does not apply for "//!" which is also a doc
161+
// comment, hence why it is relevant to track started_with_slash.
162+
if (started_with_slash == false || lexer->lookahead != '/') {
163+
lexer->result_symbol = DOC_COMMENT;
164+
165+
while (true) {
166+
while (true) {
167+
switch (lexer->lookahead) {
168+
case '\n': {
169+
goto finished_doc_comment_line;
170+
}
171+
case 0: {
172+
goto doc_comment_exit;
173+
}
174+
default: {
175+
advance(lexer);
176+
}
177+
}
178+
}
179+
180+
finished_doc_comment_line:
181+
182+
lexer->mark_end(lexer);
183+
advance(lexer);
184+
if (lexer->lookahead == '/') {
185+
advance(lexer);
186+
if (lexer->lookahead == '/') {
187+
advance(lexer);
188+
if (started_with_slash) {
189+
if (lexer->lookahead == '/') {
190+
advance(lexer);
191+
// If a fourth slash is found, the line turns back to a normal comment
192+
if (lexer->lookahead == '/') {
193+
break;
194+
}
195+
} else {
196+
break;
197+
}
198+
} else if (lexer->lookahead != '!') {
199+
break;
200+
}
201+
} else {
202+
break;
203+
}
204+
} else {
205+
break;
206+
}
207+
}
208+
}
209+
210+
break;
211+
}
212+
}
213+
214+
doc_comment_exit:
215+
216+
// Might have already processed a doc comment in the loop above
217+
if (lexer->result_symbol != DOC_COMMENT) {
218+
lexer->result_symbol = LINE_COMMENT;
219+
while (lexer->lookahead != '\n' && lexer->lookahead != 0) {
220+
advance(lexer);
221+
}
222+
}
223+
224+
return true;
225+
}
226+
146227
if (lexer->lookahead != '*') return false;
228+
147229
advance(lexer);
148230

149231
bool after_star = false;

0 commit comments

Comments
 (0)