Skip to content

Commit 0b7801a

Browse files
authored
Fix tokenization of strings and add support for single quotes (#13)
1 parent 6f591a2 commit 0b7801a

File tree

1 file changed

+33
-15
lines changed

1 file changed

+33
-15
lines changed

src/tokenizer.ts

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@
4040
IN THE SOFTWARE.
4141
* */
4242

43-
import {TokenType} from "./tokens";
44-
import {TokenizerErrors} from "./errors";
43+
import { TokenType } from "./tokens";
44+
import { TokenizerErrors } from "./errors";
4545

4646
export class Token {
4747
type: TokenType;
@@ -175,6 +175,13 @@ export class Tokenizer {
175175
this.tokens.push(new Token(type, lexeme, line, col, this.current - lexeme.length))
176176
}
177177

178+
private addStringToken(type: TokenType) {
179+
const line = this.line
180+
const col = this.col;
181+
const lexeme = this.source.slice(this.start + 1, this.current - 1);
182+
this.tokens.push(new Token(type, lexeme, line, col, this.current - lexeme.length))
183+
}
184+
178185
// Checks that the current character matches a pattern. If so the character is consumed, else nothing is consumed.
179186
private matches(pattern: string): boolean {
180187
if (this.isAtEnd()) {
@@ -196,25 +203,25 @@ export class Tokenizer {
196203
private isDigit(c: string): boolean {
197204
return /^[0-9]/.test(c);
198205
}
199-
200-
private isHexa(c: string) : boolean {
206+
207+
private isHexa(c: string): boolean {
201208
return /^[0-9A-F]$/i.test(c);
202209
}
203-
204-
private isOcta(c: string) : boolean {
210+
211+
private isOcta(c: string): boolean {
205212
return /^[0-7]/.test(c);
206213
}
207-
208-
private isBinary(c: string) : boolean {
214+
215+
private isBinary(c: string): boolean {
209216
return /^[0-1]/.test(c);
210217
}
211-
218+
212219
private isIdentifier(c: string): boolean {
213220
return c === '_' || this.isAlpha(c) || this.isDigit(c);
214221
}
215-
222+
216223
private baseNumber() {
217-
switch(this.peek()) {
224+
switch (this.peek()) {
218225
case 'x':
219226
this.advance();
220227
if (!this.isHexa(this.peek())) {
@@ -265,9 +272,9 @@ export class Tokenizer {
265272
}
266273
}
267274
}
268-
this.addToken(TokenType.NUMBER);
275+
this.addToken(TokenType.NUMBER);
269276
}
270-
277+
271278
private number() {
272279
while (this.isDigit(this.peek())) {
273280
this.advance();
@@ -292,7 +299,7 @@ export class Tokenizer {
292299
this.advance();
293300
}
294301
}
295-
302+
296303
this.addToken(TokenType.NUMBER);
297304
}
298305

@@ -433,7 +440,18 @@ export class Tokenizer {
433440
}
434441
// Consume closing "
435442
this.advance();
436-
this.addToken(TokenType.STRING);
443+
this.addStringToken(TokenType.STRING);
444+
break;
445+
case '\'':
446+
while (this.peek() != '\'' && this.peek() != '\n' && !this.isAtEnd()) {
447+
this.advance();
448+
}
449+
if (this.peek() === '\n' || this.isAtEnd()) {
450+
throw new TokenizerErrors.UnterminatedStringError(this.line, this.col, this.source, this.start, this.current);
451+
}
452+
// Consume closing '
453+
this.advance();
454+
this.addStringToken(TokenType.STRING);
437455
break;
438456
// Number... I wish JS had match statements :(
439457
case '0':

0 commit comments

Comments
 (0)