Skip to content

Commit 10e4040

Browse files
committed
lexer: add byte and char pos
1 parent f835edd commit 10e4040

File tree

1 file changed

+32
-19
lines changed

1 file changed

+32
-19
lines changed

parser/lexer/lexer.go

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@ import (
88
)
99

1010
func Lex(source file.Source) ([]Token, error) {
11+
raw := source.String()
1112
l := &lexer{
12-
source: []rune(source.String()),
13+
raw: raw,
14+
runes: []rune(raw),
1315
tokens: make([]Token, 0),
14-
start: 0,
15-
end: 0,
1616
}
17-
l.commit()
1817

1918
for state := root; state != nil; {
2019
state = state(l)
@@ -28,10 +27,16 @@ func Lex(source file.Source) ([]Token, error) {
2827
}
2928

3029
type lexer struct {
31-
source []rune
30+
raw string
31+
runes []rune
3232
tokens []Token
33-
start, end int
3433
err *file.Error
34+
start, end pos
35+
eof bool
36+
}
37+
38+
type pos struct {
39+
byte, rune int
3540
}
3641

3742
const eof rune = -1
@@ -41,12 +46,12 @@ func (l *lexer) commit() {
4146
}
4247

4348
func (l *lexer) next() rune {
44-
if l.end >= len(l.source) {
45-
l.end++
49+
if l.end.rune >= len(l.runes) {
50+
l.eof = true
4651
return eof
4752
}
48-
r := l.source[l.end]
49-
l.end++
53+
r := l.runes[l.end.rune]
54+
l.end.rune++
5055
return r
5156
}
5257

@@ -57,7 +62,11 @@ func (l *lexer) peek() rune {
5762
}
5863

5964
func (l *lexer) backup() {
60-
l.end--
65+
if l.eof {
66+
l.eof = false
67+
} else {
68+
l.end.rune--
69+
}
6170
}
6271

6372
func (l *lexer) emit(t Kind) {
@@ -66,19 +75,19 @@ func (l *lexer) emit(t Kind) {
6675

6776
func (l *lexer) emitValue(t Kind, value string) {
6877
l.tokens = append(l.tokens, Token{
69-
Location: file.Location{From: l.start, To: l.end},
78+
Location: file.Location{From: l.start.rune, To: l.end.rune},
7079
Kind: t,
7180
Value: value,
7281
})
7382
l.commit()
7483
}
7584

7685
func (l *lexer) emitEOF() {
77-
from := l.end - 2
86+
from := l.end.rune - 1
7887
if from < 0 {
7988
from = 0
8089
}
81-
to := l.end - 1
90+
to := l.end.rune - 0
8291
if to < 0 {
8392
to = 0
8493
}
@@ -95,10 +104,10 @@ func (l *lexer) skip() {
95104

96105
func (l *lexer) word() string {
97106
// TODO: boundary check is NOT needed here, but for some reason CI fuzz tests are failing.
98-
if l.start > len(l.source) || l.end > len(l.source) {
107+
if l.start.rune > len(l.runes) || l.end.rune > len(l.runes) {
99108
return "__invalid__"
100109
}
101-
return string(l.source[l.start:l.end])
110+
return string(l.runes[l.start.rune:l.end.rune])
102111
}
103112

104113
func (l *lexer) accept(valid string) bool {
@@ -144,10 +153,14 @@ func (l *lexer) acceptWord(word string) bool {
144153

145154
func (l *lexer) error(format string, args ...any) stateFn {
146155
if l.err == nil { // show first error
156+
end := l.end.rune
157+
if l.eof {
158+
end++
159+
}
147160
l.err = &file.Error{
148161
Location: file.Location{
149-
From: l.end - 1,
150-
To: l.end,
162+
From: end - 1,
163+
To: end,
151164
},
152165
Message: fmt.Sprintf(format, args...),
153166
}
@@ -225,6 +238,6 @@ func (l *lexer) scanRawString(quote rune) (n int) {
225238
ch = l.next()
226239
n++
227240
}
228-
l.emitValue(String, string(l.source[l.start+1:l.end-1]))
241+
l.emitValue(String, string(l.runes[l.start.rune+1:l.end.rune-1]))
229242
return
230243
}

0 commit comments

Comments
 (0)