Skip to content

Commit 215754a

Browse files
authored
Fix issue #8 (#16)
* Fix issue8. * Cleaning up code * Reduced nested conditionals * Update tokenizer.ts * Fixed typo * Fixed typos
1 parent 17c51ee commit 215754a

File tree

3 files changed

+63
-33
lines changed

3 files changed

+63
-33
lines changed

src/index.ts

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,15 @@
129129
/* Use as a command line script */
130130
/* npm run start:dev -- test.py */
131131

132-
import {Tokenizer} from "./tokenizer";
133-
import {Parser} from "./parser";
134-
import {Translator} from "./translator";
135-
import {Program} from "estree";
136-
import {Resolver} from "./resolver";
132+
import { Tokenizer } from "./tokenizer";
133+
import { Parser } from "./parser";
134+
import { Translator } from "./translator";
135+
import { Program } from "estree";
136+
import { Resolver } from "./resolver";
137137

138138
export function parsePythonToEstreeAst(code: string,
139-
variant: number = 1,
140-
doValidate: boolean = false) : Program {
139+
variant: number = 1,
140+
doValidate: boolean = false): Program {
141141
const script = code + '\n'
142142
const tokenizer = new Tokenizer(script)
143143
const tokens = tokenizer.scanEverything()
@@ -152,7 +152,6 @@ export function parsePythonToEstreeAst(code: string,
152152

153153
export * from './errors';
154154

155-
156155
// import {ParserErrors, ResolverErrors, TokenizerErrors} from "./errors";
157156
// import fs from "fs";
158157
// const BaseParserError = ParserErrors.BaseParserError;

src/parser.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@
3939
IN THE SOFTWARE.
4040
**/
4141

42-
import {SPECIAL_IDENTIFIER_TOKENS, Token} from "./tokenizer";
43-
import {TokenType} from "./tokens";
44-
import {ExprNS, StmtNS} from "./ast-types";
45-
import {ParserErrors} from "./errors";
42+
import { SPECIAL_IDENTIFIER_TOKENS, Token } from "./tokenizer";
43+
import { TokenType } from "./tokens";
44+
import { ExprNS, StmtNS } from "./ast-types";
45+
import { ParserErrors } from "./errors";
4646

4747
type Expr = ExprNS.Expr;
4848
type Stmt = StmtNS.Stmt;
@@ -156,7 +156,7 @@ export class Parser {
156156
} else if (this.check(TokenType.NAME, ...PSEUD_NAMES, TokenType.NUMBER,
157157
TokenType.PASS, TokenType.BREAK, TokenType.CONTINUE,
158158
TokenType.RETURN, TokenType.FROM, TokenType.GLOBAL, TokenType.NONLOCAL,
159-
TokenType.ASSERT, TokenType.LPAR, ...SPECIAL_IDENTIFIER_TOKENS)) {
159+
TokenType.ASSERT, TokenType.LPAR, TokenType.STRING, ...SPECIAL_IDENTIFIER_TOKENS)) {
160160
return this.simple_stmt();
161161
}
162162
const startToken = this.peek();
@@ -165,7 +165,7 @@ export class Parser {
165165
this.parse_invalid(startToken, endToken);
166166
} catch (e) {
167167
if (e instanceof ParserErrors.BaseParserError) {
168-
throw(e)
168+
throw (e)
169169
}
170170
}
171171
throw new ParserErrors.GenericUnexpectedSyntaxError(startToken.line, startToken.col, this.source,
@@ -255,7 +255,7 @@ export class Parser {
255255
res = new StmtNS.NonLocal(startToken, startToken, this.advance());
256256
} else if (this.match(TokenType.ASSERT)) {
257257
res = new StmtNS.Assert(startToken, startToken, this.test());
258-
} else if (this.check(TokenType.LPAR, TokenType.NUMBER, ...SPECIAL_IDENTIFIER_TOKENS)) {
258+
} else if (this.check(TokenType.LPAR, TokenType.NUMBER, TokenType.STRING, ...SPECIAL_IDENTIFIER_TOKENS)) {
259259
res = new StmtNS.SimpleExpr(startToken, startToken, this.test());
260260
} else {
261261
throw new Error("Unreachable code path");

src/tokenizer.ts

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ export class Tokenizer {
150150

151151
private advance() {
152152
const res = this.source[this.current];
153+
if (this.peek() == '\n') {
154+
this.line += 1;
155+
}
153156
this.current += 1;
154157
this.col += 1;
155158
return res;
@@ -178,10 +181,19 @@ export class Tokenizer {
178181
private addStringToken(type: TokenType) {
179182
const line = this.line
180183
const col = this.col;
184+
// Remove starting and ending quotes when slicing
185+
// Ensures that string is parsed properly
181186
const lexeme = this.source.slice(this.start + 1, this.current - 1);
182187
this.tokens.push(new Token(type, lexeme, line, col, this.current - lexeme.length))
183188
}
184189

190+
private addMultiLineStringToken(type: TokenType) {
191+
const line = this.line
192+
const col = this.col;
193+
// Remove three starting and ending quotes when slicing
194+
const lexeme = this.source.slice(this.start + 3, this.current - 3);
195+
this.tokens.push(new Token(type, lexeme, line, col, this.current - lexeme.length))
196+
}
185197
// Checks that the current character matches a pattern. If so the character is consumed, else nothing is consumed.
186198
private matches(pattern: string): boolean {
187199
if (this.isAtEnd()) {
@@ -432,26 +444,45 @@ export class Tokenizer {
432444
break;
433445
// String
434446
case '"':
435-
while (this.peek() != '"' && this.peek() != '\n' && !this.isAtEnd()) {
436-
this.advance();
437-
}
438-
if (this.peek() === '\n' || this.isAtEnd()) {
439-
throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current);
440-
}
441-
// Consume closing "
442-
this.advance();
443-
this.addStringToken(TokenType.STRING);
444-
break;
445-
case '\'':
446-
while (this.peek() != '\'' && this.peek() != '\n' && !this.isAtEnd()) {
447+
case "'":
448+
let quote = c;
449+
if (this.peek() == quote) { // handle multi-line string
450+
this.advance(); // second quote found and consumed
451+
if (this.peek() != quote) { // empty string ""
452+
this.addStringToken(TokenType.STRING);
453+
break;
454+
}
455+
this.advance(); // third quote consumed
456+
while (this.peek() != quote && !this.isAtEnd()) {
457+
this.advance(); // advance until ending quote found
458+
}
459+
if (this.isAtEnd()) {
460+
throw new TokenizerErrors.UnterminatedStringError(this.line,
461+
this.col, this.source, this.start, this.current);
462+
}
463+
this.advance(); // consume first ending quote
464+
if (this.peek() != quote) {
465+
throw new TokenizerErrors.UnterminatedStringError(this.line,
466+
this.col, this.source, this.start, this.current);
467+
}
468+
this.advance(); // consume second ending quote
469+
if (this.peek() != quote) {
470+
throw new TokenizerErrors.UnterminatedStringError(this.line,
471+
this.col, this.source, this.start, this.current);
472+
}
473+
this.advance(); // consume third ending quote
474+
this.addMultiLineStringToken(TokenType.STRING);
475+
} else { // other case, single-line string
476+
while (this.peek() != quote && this.peek() != '\n' && !this.isAtEnd()) {
477+
this.advance();
478+
}
479+
if (this.peek() === '\n' || this.isAtEnd()) {
480+
throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current);
481+
}
482+
// Consume Closing "
447483
this.advance();
484+
this.addStringToken(TokenType.STRING);
448485
}
449-
if (this.peek() === '\n' || this.isAtEnd()) {
450-
throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current);
451-
}
452-
// Consume closing '
453-
this.advance();
454-
this.addStringToken(TokenType.STRING);
455486
break;
456487
// Number... I wish JS had match statements :(
457488
case '0':

0 commit comments

Comments
 (0)