Skip to content

Commit 1587bd5

Browse files
Handle newlines in blocks
1 parent 2545cc4 commit 1587bd5

File tree

6 files changed

+97
-41
lines changed

6 files changed

+97
-41
lines changed

src/errors.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ export namespace TokenizerErrors {
8484
this.name = "NonFourIndentError";
8585
}
8686
}
87+
88+
export class InconsistentIndentError extends BaseTokenizerError {
89+
constructor(line: number, col: number, source: string, start: number) {
90+
let msg = getFullLine(source, start) + "\n";
91+
let hint = `^ This indent/dedent is inconsistent with other indents/dedents. It's currently ${col} spaces.`;
92+
hint = hint.padStart(hint.length + col - MAGIC_OFFSET, "-");
93+
super(msg + hint, line, col);
94+
this.name = "InconsistentIndentError";
95+
}
96+
}
8797
export class ForbiddenIdentifierError extends BaseTokenizerError {
8898
constructor(line: number, col: number, source: string, start: number) {
8999
let msg = getFullLine(source, start) + "\n";

src/index.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,6 @@ export function parsePythonToEstreeAst(code: string,
153153
export * from './errors';
154154

155155

156-
// import {Tokenizer} from "./tokenizer";
157-
// import {Parser} from "./parser";
158-
// import {Resolver} from "./resolver";
159-
// import {Translator} from "./translator";
160156
// import {ParserErrors, ResolverErrors, TokenizerErrors} from "./errors";
161157
// import fs from "fs";
162158
// const BaseParserError = ParserErrors.BaseParserError;
@@ -172,8 +168,8 @@ export * from './errors';
172168
// tokenizer.printTokens();
173169
// const parser = new Parser(text, tokens);
174170
// const ast = parser.parse();
175-
// const resolver = new Resolver(text, ast);
176-
// resolver.resolve(ast);
171+
// // const resolver = new Resolver(text, ast);
172+
// // resolver.resolve(ast);
177173
// console.dir(ast, { depth: null });
178174
// const translator = new Translator(text);
179175
// const estreeAst = translator.resolve(ast);

src/tests/regression.test.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import {toEstreeAST} from "./utils";
2+
3+
describe('Regression tests for py-slang', () => {
4+
test('Issue #2', () => {
5+
const text = `
6+
def foo():
7+
pass
8+
9+
pass
10+
`;
11+
toEstreeAST(text);
12+
})
13+
test('Issue #5', () => {
14+
const text = `
15+
print("hi")
16+
17+
print("world")
18+
`;
19+
toEstreeAST(text);
20+
})
21+
})

src/tests/utils.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import {
2-
Expression,
2+
Expression, Program,
33
Statement,
44
} from "estree";
55

@@ -11,11 +11,12 @@ import {StmtNS} from "../ast-types";
1111
import Stmt = StmtNS.Stmt;
1212

1313
export function toPythonAst(text: string): Stmt {
14-
const tok = new Tokenizer(text);
15-
const tokens = tok.scanEverything();
16-
// tok.printTokens();
17-
const ast = (new Parser(text, tokens)).parse();
18-
console.log(ast);
14+
const script = text + '\n'
15+
const tokenizer = new Tokenizer(script)
16+
const tokens = tokenizer.scanEverything()
17+
const pyParser = new Parser(script, tokens)
18+
const ast = pyParser.parse()
19+
// console.dir(ast);
1920
return ast;
2021
}
2122

src/tokenizer.ts

Lines changed: 53 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ const specialIdentifiers = new Map([
8585
["else", TokenType.ELSE],
8686
["in", TokenType.IN],
8787
]);
88+
8889
export const SPECIAL_IDENTIFIER_TOKENS = Array.from(specialIdentifiers.values());
8990

9091
export class Tokenizer {
@@ -94,8 +95,7 @@ export class Tokenizer {
9495
private current: number;
9596
private line: number;
9697
private col: number;
97-
private prevLineLeadingWhiteSpace: number;
98-
private currLineLeadingWhiteSpace: number;
98+
private readonly indentStack: number[];
9999
private specialIdentifiers: Map<string, TokenType>;
100100
private forbiddenIdentifiers: Map<string, TokenType>;
101101
// forbiddenOperators: Set<TokenType>;
@@ -106,8 +106,7 @@ export class Tokenizer {
106106
this.current = 0;
107107
this.line = 0;
108108
this.col = 0;
109-
this.prevLineLeadingWhiteSpace = 0;
110-
this.currLineLeadingWhiteSpace = 0;
109+
this.indentStack = [0];
111110
this.specialIdentifiers = specialIdentifiers;
112111
// Not used by us, but should be kept reserved as per Python spec
113112
this.forbiddenIdentifiers = new Map([
@@ -254,7 +253,7 @@ export class Tokenizer {
254253
//// SPECIAL MARKERS
255254
// Comment -- advance to end of line.
256255
case '#':
257-
while (this.peek() != '\n' && !this.isAtEnd()) {
256+
while ((this.peek() != '\n' || this.peek() != '\r') && !this.isAtEnd()) {
258257
this.advance();
259258
}
260259
break;
@@ -275,40 +274,62 @@ export class Tokenizer {
275274
this.addToken(TokenType.NEWLINE);
276275
this.line += 1;
277276
this.col = 0;
278-
// @TODO fix me
279-
// // Avoid lines that are completely empty.
280-
// if (this.peek() === '\n' || this.peek() === '\r') {
281-
// this.advance();
282-
// if (this.peek() === '\n') {
283-
// this.advance();
284-
// }
285-
// this.addToken(TokenType.NEWLINE);
286-
// break;
287-
// }
288-
this.prevLineLeadingWhiteSpace = this.currLineLeadingWhiteSpace;
289-
this.currLineLeadingWhiteSpace = 0;
277+
let accLeadingWhiteSpace = 0;
290278
// Detect significant whitespace
291279
while (this.peek() === " " && !this.isAtEnd()) {
292-
this.currLineLeadingWhiteSpace += 1;
280+
accLeadingWhiteSpace += 1;
293281
// Consume the rest of the line's leading whitespace.
294282
this.advance();
295283
}
296-
if (this.currLineLeadingWhiteSpace > this.prevLineLeadingWhiteSpace) {
297-
if (this.currLineLeadingWhiteSpace % 4 !== 0) {
298-
throw new TokenizerErrors.NonFourIndentError(this.line, this.col, this.source, this.current);
284+
// The following block handles things like
285+
/*
286+
def foo():
287+
pass
288+
<---- this newline should be zapped
289+
pass <---- this should be part of the block
290+
*/
291+
while ((this.peek() === "\n" || this.peek() === "\r") && !this.isAtEnd()) {
292+
// Handle \r\n on Windows
293+
if (this.peek() === "\r") {
294+
this.advance();
295+
if (this.peek() === "\n") {
296+
this.advance();
297+
}
298+
} else {
299+
this.advance();
300+
}
301+
this.line += 1;
302+
this.col = 0;
303+
accLeadingWhiteSpace = 0;
304+
// Detect significant whitespace
305+
while (this.peek() === " " && !this.isAtEnd()) {
306+
accLeadingWhiteSpace += 1;
307+
// Consume the rest of the line's leading whitespace.
308+
this.advance();
299309
}
300-
const indents = Math.floor((this.currLineLeadingWhiteSpace - this.prevLineLeadingWhiteSpace) / 4);
310+
}
311+
if (accLeadingWhiteSpace % 4 !== 0) {
312+
throw new TokenizerErrors.NonFourIndentError(this.line, this.col, this.source, this.current);
313+
}
314+
const tos = this.indentStack[this.indentStack.length-1];
315+
if (accLeadingWhiteSpace > tos) {
316+
this.indentStack.push(accLeadingWhiteSpace);
317+
const indents = Math.floor((accLeadingWhiteSpace - tos) / 4);
301318
for (let i = 0; i < indents; ++i) {
302319
this.addToken(TokenType.INDENT);
303320
}
304-
break;
305-
}
306-
if (this.currLineLeadingWhiteSpace < this.prevLineLeadingWhiteSpace) {
307-
const indents = Math.floor((this.prevLineLeadingWhiteSpace - this.currLineLeadingWhiteSpace) / 4);
321+
} else if (accLeadingWhiteSpace < tos) {
322+
if (this.indentStack.length == 0) {
323+
throw new TokenizerErrors.InconsistentIndentError(this.line, this.col, this.source, this.current);
324+
}
325+
const prev = this.indentStack.pop();
326+
if (prev === undefined || prev === null) {
327+
throw new TokenizerErrors.InconsistentIndentError(this.line, this.col, this.source, this.current);
328+
}
329+
const indents = Math.floor((prev - accLeadingWhiteSpace) / 4);
308330
for (let i = 0; i < indents; ++i) {
309331
this.addToken(TokenType.DEDENT);
310332
}
311-
break;
312333
}
313334
break;
314335
// String
@@ -420,6 +441,11 @@ export class Tokenizer {
420441
this.start = this.current;
421442
this.scanToken();
422443
}
444+
// Unravel the indent stack
445+
while(this.indentStack[this.indentStack.length-1] !== 0) {
446+
this.indentStack.pop();
447+
this.addToken(TokenType.DEDENT);
448+
}
423449
this.tokens.push(new Token(TokenType.ENDMARKER, "", this.line, this.col, this.current));
424450
return this.tokens
425451
}

test.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
1-
# import { heart, blue, red, stack } from "rune";
2-
test(1)(2)
1+
def foo():
2+
pass
3+
4+
pass

0 commit comments

Comments
 (0)