Fix issue #8 (#16)

JJtan2002 · web-flow · commit 215754aa9162 · 2024-03-17T21:29:36.000+08:00
* Fix issue8.

* Cleaning up code

* Reduced nested conditionals

* Update tokenizer.ts

* Fixed typo

* Fixed typos
diff --git a/src/index.ts b/src/index.ts
@@ -129,15 +129,15 @@
 /* Use as a command line script */
 /* npm run start:dev -- test.py */
 
-import {Tokenizer} from "./tokenizer";
-import {Parser} from "./parser";
-import {Translator} from "./translator";
-import {Program} from "estree";
-import {Resolver} from "./resolver";
+import { Tokenizer } from "./tokenizer";
+import { Parser } from "./parser";
+import { Translator } from "./translator";
+import { Program } from "estree";
+import { Resolver } from "./resolver";
 
 export function parsePythonToEstreeAst(code: string,
-                                       variant: number = 1,
-                                       doValidate: boolean = false) : Program {
+    variant: number = 1,
+    doValidate: boolean = false): Program {
     const script = code + '\n'
     const tokenizer = new Tokenizer(script)
     const tokens = tokenizer.scanEverything()
@@ -152,7 +152,6 @@ export function parsePythonToEstreeAst(code: string,
 
 export * from './errors';
 
-
 // import {ParserErrors, ResolverErrors, TokenizerErrors} from "./errors";
 // import fs from "fs";
 // const BaseParserError = ParserErrors.BaseParserError;
diff --git a/src/parser.ts b/src/parser.ts
@@ -39,10 +39,10 @@
     IN THE SOFTWARE.
 **/
 
-import {SPECIAL_IDENTIFIER_TOKENS, Token} from "./tokenizer";
-import {TokenType} from "./tokens";
-import {ExprNS, StmtNS} from "./ast-types";
-import {ParserErrors} from "./errors";
+import { SPECIAL_IDENTIFIER_TOKENS, Token } from "./tokenizer";
+import { TokenType } from "./tokens";
+import { ExprNS, StmtNS } from "./ast-types";
+import { ParserErrors } from "./errors";
 
 type Expr = ExprNS.Expr;
 type Stmt = StmtNS.Stmt;
@@ -156,7 +156,7 @@ export class Parser {
         } else if (this.check(TokenType.NAME, ...PSEUD_NAMES, TokenType.NUMBER,
             TokenType.PASS, TokenType.BREAK, TokenType.CONTINUE,
             TokenType.RETURN, TokenType.FROM, TokenType.GLOBAL, TokenType.NONLOCAL,
-            TokenType.ASSERT, TokenType.LPAR, ...SPECIAL_IDENTIFIER_TOKENS)) {
+            TokenType.ASSERT, TokenType.LPAR, TokenType.STRING, ...SPECIAL_IDENTIFIER_TOKENS)) {
             return this.simple_stmt();
         }
         const startToken = this.peek();
@@ -165,7 +165,7 @@ export class Parser {
             this.parse_invalid(startToken, endToken);
         } catch (e) {
             if (e instanceof ParserErrors.BaseParserError) {
-                throw(e)
+                throw (e)
             }
         }
         throw new ParserErrors.GenericUnexpectedSyntaxError(startToken.line, startToken.col, this.source,
@@ -255,7 +255,7 @@ export class Parser {
             res = new StmtNS.NonLocal(startToken, startToken, this.advance());
         } else if (this.match(TokenType.ASSERT)) {
             res = new StmtNS.Assert(startToken, startToken, this.test());
-        } else if (this.check(TokenType.LPAR, TokenType.NUMBER, ...SPECIAL_IDENTIFIER_TOKENS)) {
+        } else if (this.check(TokenType.LPAR, TokenType.NUMBER, TokenType.STRING, ...SPECIAL_IDENTIFIER_TOKENS)) {
             res = new StmtNS.SimpleExpr(startToken, startToken, this.test());
         } else {
             throw new Error("Unreachable code path");
diff --git a/src/tokenizer.ts b/src/tokenizer.ts
@@ -150,6 +150,9 @@ export class Tokenizer {
 
     private advance() {
         const res = this.source[this.current];
+        if (this.peek() == '\n') {
+            this.line += 1;
+        }
         this.current += 1;
         this.col += 1;
         return res;
@@ -178,10 +181,19 @@ export class Tokenizer {
     private addStringToken(type: TokenType) {
         const line = this.line
         const col = this.col;
+        // Remove starting and ending quotes when slicing
+        // Ensures that string is parsed properly
         const lexeme = this.source.slice(this.start + 1, this.current - 1);
         this.tokens.push(new Token(type, lexeme, line, col, this.current - lexeme.length))
     }
 
+    private addMultiLineStringToken(type: TokenType) {
+        const line = this.line
+        const col = this.col;
+        // Remove three starting and ending quotes when slicing
+        const lexeme = this.source.slice(this.start + 3, this.current - 3);
+        this.tokens.push(new Token(type, lexeme, line, col, this.current - lexeme.length))
+    }
     // Checks that the current character matches a pattern. If so the character is consumed, else nothing is consumed.
     private matches(pattern: string): boolean {
         if (this.isAtEnd()) {
@@ -432,26 +444,45 @@ export class Tokenizer {
                 break;
             // String
             case '"':
-                while (this.peek() != '"' && this.peek() != '\n' && !this.isAtEnd()) {
-                    this.advance();
-                }
-                if (this.peek() === '\n' || this.isAtEnd()) {
-                    throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current);
-                }
-                // Consume closing "
-                this.advance();
-                this.addStringToken(TokenType.STRING);
-                break;
-            case '\'':
-                while (this.peek() != '\'' && this.peek() != '\n' && !this.isAtEnd()) {
+            case "'":
+                let quote = c;
+                if (this.peek() == quote) { // handle multi-line string
+                    this.advance(); // second quote found and consumed
+                    if (this.peek() != quote) { // empty string ""
+                        this.addStringToken(TokenType.STRING);
+                        break;
+                    }
+                    this.advance(); // third quote consumed
+                    while (this.peek() != quote && !this.isAtEnd()) {
+                        this.advance(); // advance until ending quote found
+                    }
+                    if (this.isAtEnd()) {
+                        throw new TokenizerErrors.UnterminatedStringError(this.line,
+                            this.col, this.source, this.start, this.current);
+                    }
+                    this.advance(); // consume first ending quote
+                    if (this.peek() != quote) {
+                        throw new TokenizerErrors.UnterminatedStringError(this.line,
+                            this.col, this.source, this.start, this.current);
+                    }
+                    this.advance(); // consume second ending quote
+                    if (this.peek() != quote) {
+                        throw new TokenizerErrors.UnterminatedStringError(this.line,
+                            this.col, this.source, this.start, this.current);
+                    }
+                    this.advance(); // consume third ending quote
+                    this.addMultiLineStringToken(TokenType.STRING);
+                } else { // other case, single-line string
+                    while (this.peek() != quote && this.peek() != '\n' && !this.isAtEnd()) {
+                        this.advance();
+                    }
+                    if (this.peek() === '\n' || this.isAtEnd()) {
+                        throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current);
+                    }
+                    // Consume Closing "
                     this.advance();
+                    this.addStringToken(TokenType.STRING);
                 }
-                if (this.peek() === '\n' || this.isAtEnd()) {
-                    throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current);
-                }
-                // Consume closing '
-                this.advance();
-                this.addStringToken(TokenType.STRING);
                 break;
             // Number... I wish JS had match statements :(
             case '0':