From 19d6fcbfe33e6917d9d2ac26e51b414f03ee6df1 Mon Sep 17 00:00:00 2001 From: jackschu Date: Sun, 30 Jun 2024 16:38:44 -0400 Subject: [PATCH 1/9] only consider block comments to cause auto semis if theyve passed a newline --- src/scanner.c | 16 ++++++++++++---- test/corpus/semicolon_insertion.txt | 4 ++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index d429fcba..0662144b 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -47,7 +47,7 @@ static bool scan_template_chars(TSLexer *lexer) { } } -static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) { +static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, bool *saw_block_newline) { for (;;) { while (iswspace(lexer->lookahead)) { skip(lexer); @@ -73,6 +73,10 @@ static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) *scanned_comment = true; break; } + } else if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || + lexer->lookahead == 0x2029) { + *saw_block_newline = true; + skip(lexer); } else { skip(lexer); } @@ -90,17 +94,19 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo lexer->result_symbol = AUTOMATIC_SEMICOLON; lexer->mark_end(lexer); + bool saw_comment_newline = false; for (;;) { if (lexer->lookahead == 0) { return true; } if (lexer->lookahead == '/') { - if (!scan_whitespace_and_comments(lexer, scanned_comment)) { + if (!scan_whitespace_and_comments(lexer, scanned_comment, &saw_comment_newline)) { return false; } + if (comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') { - return true; + return saw_comment_newline; } } @@ -125,7 +131,9 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo skip(lexer); - if (!scan_whitespace_and_comments(lexer, scanned_comment)) { + + + if (!scan_whitespace_and_comments(lexer, scanned_comment, &saw_comment_newline)) { return false; } diff --git a/test/corpus/semicolon_insertion.txt b/test/corpus/semicolon_insertion.txt index d47e3386..a680ee2b 100644 --- a/test/corpus/semicolon_insertion.txt +++ b/test/corpus/semicolon_insertion.txt @@ -276,8 +276,8 @@ let d (program (lexical_declaration - (variable_declarator (identifier))) - (comment) + (variable_declarator (identifier)) + (comment)) (comment) (lexical_declaration (variable_declarator (identifier)) From 69556fc8a8037c070421e5834b308d27ff5c494b Mon Sep 17 00:00:00 2001 From: jackschu Date: Sun, 30 Jun 2024 16:47:44 -0400 Subject: [PATCH 2/9] move to returning 'did scan single-line block comment --- src/scanner.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 0662144b..319b4ef7 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -47,7 +47,8 @@ static bool scan_template_chars(TSLexer *lexer) { } } -static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, bool *saw_block_newline) { +static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, bool *saw_single_block_comment) { + bool saw_block_newline = false; for (;;) { while (iswspace(lexer->lookahead)) { skip(lexer); @@ -70,12 +71,13 @@ static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, skip(lexer); if (lexer->lookahead == '/') { skip(lexer); + *saw_single_block_comment = !saw_block_newline; *scanned_comment = true; break; } } else if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) { - *saw_block_newline = true; + saw_block_newline = true; skip(lexer); } else { skip(lexer); @@ -94,19 +96,19 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo lexer->result_symbol = AUTOMATIC_SEMICOLON; lexer->mark_end(lexer); - bool saw_comment_newline = false; + bool saw_single_block_comment = true; for (;;) { if (lexer->lookahead == 0) { return true; } if (lexer->lookahead == '/') { - if (!scan_whitespace_and_comments(lexer, scanned_comment, &saw_comment_newline)) { + if (!scan_whitespace_and_comments(lexer, scanned_comment, &saw_single_block_comment)) { return false; } if (comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') { - return saw_comment_newline; + return !saw_single_block_comment; } } @@ -133,7 +135,7 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo - if (!scan_whitespace_and_comments(lexer, scanned_comment, &saw_comment_newline)) { + if (!scan_whitespace_and_comments(lexer, scanned_comment, &saw_single_block_comment)) { return false; } From 2c1ae341e72b18f36444fb993e1e4f48f8ad2a35 Mon Sep 17 00:00:00 2001 From: jackschu Date: Sun, 30 Jun 2024 21:37:08 -0400 Subject: [PATCH 3/9] fix: this should only apply if we see a block comment, restore test file --- src/scanner.c | 5 +++-- test/corpus/semicolon_insertion.txt | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 319b4ef7..0b66e2ff 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -96,13 +96,13 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo lexer->result_symbol = AUTOMATIC_SEMICOLON; lexer->mark_end(lexer); - bool saw_single_block_comment = true; for (;;) { if (lexer->lookahead == 0) { return true; } if (lexer->lookahead == '/') { + bool saw_single_block_comment = false; if (!scan_whitespace_and_comments(lexer, scanned_comment, &saw_single_block_comment)) { return false; } @@ -135,7 +135,8 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo - if (!scan_whitespace_and_comments(lexer, scanned_comment, &saw_single_block_comment)) { + bool unused = false; + if (!scan_whitespace_and_comments(lexer, scanned_comment, &unused)) { return false; } diff --git a/test/corpus/semicolon_insertion.txt b/test/corpus/semicolon_insertion.txt index a680ee2b..d47e3386 100644 --- a/test/corpus/semicolon_insertion.txt +++ b/test/corpus/semicolon_insertion.txt @@ -276,8 +276,8 @@ let d (program (lexical_declaration - (variable_declarator (identifier)) - (comment)) + (variable_declarator (identifier))) + (comment) (comment) (lexical_declaration (variable_declarator (identifier)) From 161458206b893f718aaca13b17d5b3e4283f7bf9 Mon Sep 17 00:00:00 2001 From: jackschu Date: Sun, 30 Jun 2024 23:58:31 -0400 Subject: [PATCH 4/9] turns out the problem was really nuanced, there needs to be a way to decide that the first comment lookahead is not enough to return true or false, introduce an enum to represent this, also we need to stop our walk forward as soon as the comment block closes if we're going to return control flow to the remainder of the scanner --- src/scanner.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 0b66e2ff..8b4ce1eb 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -47,8 +47,18 @@ static bool scan_template_chars(TSLexer *lexer) { } } -static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, bool *saw_single_block_comment) { +enum WhitespaceResult { + REJECT, + NO_NEWLINE, // Unclear if semicolon will be legal, continue + ACCEPT, // Semicolon is legal due to comment +}; + +/** + * @param consume If false, only consume enough to check if comment indicates semicolon-legality + */ +static enum WhitespaceResult scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, bool consume) { bool saw_block_newline = false; + for (;;) { while (iswspace(lexer->lookahead)) { skip(lexer); @@ -71,8 +81,12 @@ static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, skip(lexer); if (lexer->lookahead == '/') { skip(lexer); - *saw_single_block_comment = !saw_block_newline; *scanned_comment = true; + + if(lexer->lookahead != '/' && !consume){ + return saw_block_newline ? ACCEPT : NO_NEWLINE; + } + break; } } else if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || @@ -84,10 +98,10 @@ static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, } } } else { - return false; + return REJECT; } } else { - return true; + return ACCEPT; } } } @@ -101,15 +115,17 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo return true; } + if (lexer->lookahead == '/') { - bool saw_single_block_comment = false; - if (!scan_whitespace_and_comments(lexer, scanned_comment, &saw_single_block_comment)) { + enum WhitespaceResult result = scan_whitespace_and_comments(lexer, scanned_comment, false); + if (result == REJECT) { return false; } - if (comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') { - return !saw_single_block_comment; + if (result == ACCEPT && comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') { + return true; } + } if (lexer->lookahead == '}') { @@ -135,8 +151,7 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo - bool unused = false; - if (!scan_whitespace_and_comments(lexer, scanned_comment, &unused)) { + if (scan_whitespace_and_comments(lexer, scanned_comment, true) == REJECT) { return false; } From 2e0f7166c3b0b287aae983ca3c74a3863a314d3f Mon Sep 17 00:00:00 2001 From: jackschu Date: Mon, 1 Jul 2024 00:03:38 -0400 Subject: [PATCH 5/9] add some test cases that had tripped me up --- test/corpus/semicolon_insertion.txt | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/corpus/semicolon_insertion.txt b/test/corpus/semicolon_insertion.txt index d47e3386..b24b410f 100644 --- a/test/corpus/semicolon_insertion.txt +++ b/test/corpus/semicolon_insertion.txt @@ -272,6 +272,12 @@ let b /* comment between declarators */, c let d +let e +/* back to back *//* comments */ + +b +/* interleaved non-semi-insertion */ + .c --- (program @@ -286,4 +292,9 @@ let d (comment) (comment) (comment) - (lexical_declaration (variable_declarator (identifier)))) + (lexical_declaration (variable_declarator (identifier))) + (lexical_declaration (variable_declarator (identifier))) + (comment) + (comment) + (expression_statement + (member_expression (identifier) (comment) (property_identifier)))) From 6ecd457afd42c2db1773c980c76fcf7c3c3b0583 Mon Sep 17 00:00:00 2001 From: jackschu Date: Mon, 1 Jul 2024 00:07:15 -0400 Subject: [PATCH 6/9] add the motivating error-case --- test/corpus/semicolon_insertion.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/corpus/semicolon_insertion.txt b/test/corpus/semicolon_insertion.txt index b24b410f..75cca226 100644 --- a/test/corpus/semicolon_insertion.txt +++ b/test/corpus/semicolon_insertion.txt @@ -275,6 +275,10 @@ let d let e /* back to back *//* comments */ +class C { + method/*comment*/() {} +} + b /* interleaved non-semi-insertion */ .c @@ -296,5 +300,10 @@ b (lexical_declaration (variable_declarator (identifier))) (comment) (comment) + (class_declaration (identifier) (class_body (method_definition + (property_identifier) + (comment) + (formal_parameters) + (statement_block)))) (expression_statement (member_expression (identifier) (comment) (property_identifier)))) From 04fd12297a1db8b29f2d0a44aa53c405cf776059 Mon Sep 17 00:00:00 2001 From: jackschu Date: Mon, 1 Jul 2024 00:15:58 -0400 Subject: [PATCH 7/9] update enum comments --- src/scanner.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 8b4ce1eb..6156e2c0 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -48,9 +48,9 @@ static bool scan_template_chars(TSLexer *lexer) { } enum WhitespaceResult { - REJECT, + REJECT, // Semicolon is illegal, ie a syntax error occurred NO_NEWLINE, // Unclear if semicolon will be legal, continue - ACCEPT, // Semicolon is legal due to comment + ACCEPT, // Semicolon is legal, assuming a comment was encountered }; /** From ff980db7fe9fffdc5f77fc6788762630c73af518 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 4 Jul 2024 21:50:25 -0400 Subject: [PATCH 8/9] formatting --- src/scanner.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 6156e2c0..77ecd61e 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -48,9 +48,9 @@ static bool scan_template_chars(TSLexer *lexer) { } enum WhitespaceResult { - REJECT, // Semicolon is illegal, ie a syntax error occurred + REJECT, // Semicolon is illegal, ie a syntax error occurred NO_NEWLINE, // Unclear if semicolon will be legal, continue - ACCEPT, // Semicolon is legal, assuming a comment was encountered + ACCEPT, // Semicolon is legal, assuming a comment was encountered }; /** @@ -83,14 +83,13 @@ static enum WhitespaceResult scan_whitespace_and_comments(TSLexer *lexer, bool * skip(lexer); *scanned_comment = true; - if(lexer->lookahead != '/' && !consume){ + if (lexer->lookahead != '/' && !consume) { return saw_block_newline ? ACCEPT : NO_NEWLINE; } break; } - } else if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || - lexer->lookahead == 0x2029) { + } else if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) { saw_block_newline = true; skip(lexer); } else { @@ -115,7 +114,6 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo return true; } - if (lexer->lookahead == '/') { enum WhitespaceResult result = scan_whitespace_and_comments(lexer, scanned_comment, false); if (result == REJECT) { @@ -125,7 +123,6 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo if (result == ACCEPT && comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') { return true; } - } if (lexer->lookahead == '}') { @@ -149,8 +146,6 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo skip(lexer); - - if (scan_whitespace_and_comments(lexer, scanned_comment, true) == REJECT) { return false; } From 3c076173877e236a1e74514f78724a65a40b6df1 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Thu, 4 Jul 2024 21:54:04 -0400 Subject: [PATCH 9/9] typedef over forward decl --- src/scanner.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 77ecd61e..57d2af8f 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -47,16 +47,16 @@ static bool scan_template_chars(TSLexer *lexer) { } } -enum WhitespaceResult { +typedef enum { REJECT, // Semicolon is illegal, ie a syntax error occurred NO_NEWLINE, // Unclear if semicolon will be legal, continue ACCEPT, // Semicolon is legal, assuming a comment was encountered -}; +} WhitespaceResult; /** * @param consume If false, only consume enough to check if comment indicates semicolon-legality */ -static enum WhitespaceResult scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, bool consume) { +static WhitespaceResult scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment, bool consume) { bool saw_block_newline = false; for (;;) { @@ -115,7 +115,7 @@ static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, boo } if (lexer->lookahead == '/') { - enum WhitespaceResult result = scan_whitespace_and_comments(lexer, scanned_comment, false); + WhitespaceResult result = scan_whitespace_and_comments(lexer, scanned_comment, false); if (result == REJECT) { return false; }