@@ -85,6 +85,7 @@ const specialIdentifiers = new Map([
85
85
[ "else" , TokenType . ELSE ] ,
86
86
[ "in" , TokenType . IN ] ,
87
87
] ) ;
88
+
88
89
export const SPECIAL_IDENTIFIER_TOKENS = Array . from ( specialIdentifiers . values ( ) ) ;
89
90
90
91
export class Tokenizer {
@@ -94,8 +95,7 @@ export class Tokenizer {
94
95
private current : number ;
95
96
private line : number ;
96
97
private col : number ;
97
- private prevLineLeadingWhiteSpace : number ;
98
- private currLineLeadingWhiteSpace : number ;
98
+ private readonly indentStack : number [ ] ;
99
99
private specialIdentifiers : Map < string , TokenType > ;
100
100
private forbiddenIdentifiers : Map < string , TokenType > ;
101
101
// forbiddenOperators: Set<TokenType>;
@@ -106,8 +106,7 @@ export class Tokenizer {
106
106
this . current = 0 ;
107
107
this . line = 0 ;
108
108
this . col = 0 ;
109
- this . prevLineLeadingWhiteSpace = 0 ;
110
- this . currLineLeadingWhiteSpace = 0 ;
109
+ this . indentStack = [ 0 ] ;
111
110
this . specialIdentifiers = specialIdentifiers ;
112
111
// Not used by us, but should be kept reserved as per Python spec
113
112
this . forbiddenIdentifiers = new Map ( [
@@ -254,7 +253,7 @@ export class Tokenizer {
254
253
//// SPECIAL MARKERS
255
254
// Comment -- advance to end of line.
256
255
case '#' :
257
- while ( this . peek ( ) != '\n' && ! this . isAtEnd ( ) ) {
256
+ while ( ( this . peek ( ) != '\n' || this . peek ( ) != '\r' ) && ! this . isAtEnd ( ) ) {
258
257
this . advance ( ) ;
259
258
}
260
259
break ;
@@ -275,40 +274,62 @@ export class Tokenizer {
275
274
this . addToken ( TokenType . NEWLINE ) ;
276
275
this . line += 1 ;
277
276
this . col = 0 ;
278
- // @TODO fix me
279
- // // Avoid lines that are completely empty.
280
- // if (this.peek() === '\n' || this.peek() === '\r') {
281
- // this.advance();
282
- // if (this.peek() === '\n') {
283
- // this.advance();
284
- // }
285
- // this.addToken(TokenType.NEWLINE);
286
- // break;
287
- // }
288
- this . prevLineLeadingWhiteSpace = this . currLineLeadingWhiteSpace ;
289
- this . currLineLeadingWhiteSpace = 0 ;
277
+ let accLeadingWhiteSpace = 0 ;
290
278
// Detect significant whitespace
291
279
while ( this . peek ( ) === " " && ! this . isAtEnd ( ) ) {
292
- this . currLineLeadingWhiteSpace += 1 ;
280
+ accLeadingWhiteSpace += 1 ;
293
281
// Consume the rest of the line's leading whitespace.
294
282
this . advance ( ) ;
295
283
}
296
- if ( this . currLineLeadingWhiteSpace > this . prevLineLeadingWhiteSpace ) {
297
- if ( this . currLineLeadingWhiteSpace % 4 !== 0 ) {
298
- throw new TokenizerErrors . NonFourIndentError ( this . line , this . col , this . source , this . current ) ;
284
+ // The following block handles things like
285
+ /*
286
+ def foo():
287
+ pass
288
+ <---- this newline should be zapped
289
+ pass <---- this should be part of the block
290
+ */
291
+ while ( ( this . peek ( ) === "\n" || this . peek ( ) === "\r" ) && ! this . isAtEnd ( ) ) {
292
+ // Handle \r\n on Windows
293
+ if ( this . peek ( ) === "\r" ) {
294
+ this . advance ( ) ;
295
+ if ( this . peek ( ) === "\n" ) {
296
+ this . advance ( ) ;
297
+ }
298
+ } else {
299
+ this . advance ( ) ;
300
+ }
301
+ this . line += 1 ;
302
+ this . col = 0 ;
303
+ accLeadingWhiteSpace = 0 ;
304
+ // Detect significant whitespace
305
+ while ( this . peek ( ) === " " && ! this . isAtEnd ( ) ) {
306
+ accLeadingWhiteSpace += 1 ;
307
+ // Consume the rest of the line's leading whitespace.
308
+ this . advance ( ) ;
299
309
}
300
- const indents = Math . floor ( ( this . currLineLeadingWhiteSpace - this . prevLineLeadingWhiteSpace ) / 4 ) ;
310
+ }
311
+ if ( accLeadingWhiteSpace % 4 !== 0 ) {
312
+ throw new TokenizerErrors . NonFourIndentError ( this . line , this . col , this . source , this . current ) ;
313
+ }
314
+ const tos = this . indentStack [ this . indentStack . length - 1 ] ;
315
+ if ( accLeadingWhiteSpace > tos ) {
316
+ this . indentStack . push ( accLeadingWhiteSpace ) ;
317
+ const indents = Math . floor ( ( accLeadingWhiteSpace - tos ) / 4 ) ;
301
318
for ( let i = 0 ; i < indents ; ++ i ) {
302
319
this . addToken ( TokenType . INDENT ) ;
303
320
}
304
- break ;
305
- }
306
- if ( this . currLineLeadingWhiteSpace < this . prevLineLeadingWhiteSpace ) {
307
- const indents = Math . floor ( ( this . prevLineLeadingWhiteSpace - this . currLineLeadingWhiteSpace ) / 4 ) ;
321
+ } else if ( accLeadingWhiteSpace < tos ) {
322
+ if ( this . indentStack . length == 0 ) {
323
+ throw new TokenizerErrors . InconsistentIndentError ( this . line , this . col , this . source , this . current ) ;
324
+ }
325
+ const prev = this . indentStack . pop ( ) ;
326
+ if ( prev === undefined || prev === null ) {
327
+ throw new TokenizerErrors . InconsistentIndentError ( this . line , this . col , this . source , this . current ) ;
328
+ }
329
+ const indents = Math . floor ( ( prev - accLeadingWhiteSpace ) / 4 ) ;
308
330
for ( let i = 0 ; i < indents ; ++ i ) {
309
331
this . addToken ( TokenType . DEDENT ) ;
310
332
}
311
- break ;
312
333
}
313
334
break ;
314
335
// String
@@ -420,6 +441,11 @@ export class Tokenizer {
420
441
this . start = this . current ;
421
442
this . scanToken ( ) ;
422
443
}
444
+ // Unravel the indent stack
445
+ while ( this . indentStack [ this . indentStack . length - 1 ] !== 0 ) {
446
+ this . indentStack . pop ( ) ;
447
+ this . addToken ( TokenType . DEDENT ) ;
448
+ }
423
449
this . tokens . push ( new Token ( TokenType . ENDMARKER , "" , this . line , this . col , this . current ) ) ;
424
450
return this . tokens
425
451
}
0 commit comments