@@ -8,13 +8,12 @@ import (
8
8
)
9
9
10
10
func Lex (source file.Source ) ([]Token , error ) {
11
+ raw := source .String ()
11
12
l := & lexer {
12
- source : []rune (source .String ()),
13
+ raw : raw ,
14
+ runes : []rune (raw ),
13
15
tokens : make ([]Token , 0 ),
14
- start : 0 ,
15
- end : 0 ,
16
16
}
17
- l .commit ()
18
17
19
18
for state := root ; state != nil ; {
20
19
state = state (l )
@@ -28,10 +27,16 @@ func Lex(source file.Source) ([]Token, error) {
28
27
}
29
28
30
29
type lexer struct {
31
- source []rune
30
+ raw string
31
+ runes []rune
32
32
tokens []Token
33
- start , end int
34
33
err * file.Error
34
+ start , end pos
35
+ eof bool
36
+ }
37
+
38
+ type pos struct {
39
+ byte , rune int
35
40
}
36
41
37
42
const eof rune = - 1
@@ -41,12 +46,12 @@ func (l *lexer) commit() {
41
46
}
42
47
43
48
func (l * lexer ) next () rune {
44
- if l .end >= len (l .source ) {
45
- l .end ++
49
+ if l .end . rune >= len (l .runes ) {
50
+ l .eof = true
46
51
return eof
47
52
}
48
- r := l .source [l .end ]
49
- l .end ++
53
+ r := l .runes [l .end . rune ]
54
+ l .end . rune ++
50
55
return r
51
56
}
52
57
@@ -57,7 +62,11 @@ func (l *lexer) peek() rune {
57
62
}
58
63
59
64
func (l * lexer ) backup () {
60
- l .end --
65
+ if l .eof {
66
+ l .eof = false
67
+ } else {
68
+ l .end .rune --
69
+ }
61
70
}
62
71
63
72
func (l * lexer ) emit (t Kind ) {
@@ -66,19 +75,19 @@ func (l *lexer) emit(t Kind) {
66
75
67
76
func (l * lexer ) emitValue (t Kind , value string ) {
68
77
l .tokens = append (l .tokens , Token {
69
- Location : file.Location {From : l .start , To : l .end },
78
+ Location : file.Location {From : l .start . rune , To : l .end . rune },
70
79
Kind : t ,
71
80
Value : value ,
72
81
})
73
82
l .commit ()
74
83
}
75
84
76
85
func (l * lexer ) emitEOF () {
77
- from := l .end - 2
86
+ from := l .end . rune - 1
78
87
if from < 0 {
79
88
from = 0
80
89
}
81
- to := l .end - 1
90
+ to := l .end . rune - 0
82
91
if to < 0 {
83
92
to = 0
84
93
}
@@ -95,10 +104,10 @@ func (l *lexer) skip() {
95
104
96
105
func (l * lexer ) word () string {
97
106
// TODO: boundary check is NOT needed here, but for some reason CI fuzz tests are failing.
98
- if l .start > len (l .source ) || l .end > len (l .source ) {
107
+ if l .start . rune > len (l .runes ) || l .end . rune > len (l .runes ) {
99
108
return "__invalid__"
100
109
}
101
- return string (l .source [l .start :l .end ])
110
+ return string (l .runes [l .start . rune :l .end . rune ])
102
111
}
103
112
104
113
func (l * lexer ) accept (valid string ) bool {
@@ -144,10 +153,14 @@ func (l *lexer) acceptWord(word string) bool {
144
153
145
154
func (l * lexer ) error (format string , args ... any ) stateFn {
146
155
if l .err == nil { // show first error
156
+ end := l .end .rune
157
+ if l .eof {
158
+ end ++
159
+ }
147
160
l .err = & file.Error {
148
161
Location : file.Location {
149
- From : l . end - 1 ,
150
- To : l . end ,
162
+ From : end - 1 ,
163
+ To : end ,
151
164
},
152
165
Message : fmt .Sprintf (format , args ... ),
153
166
}
@@ -225,6 +238,6 @@ func (l *lexer) scanRawString(quote rune) (n int) {
225
238
ch = l .next ()
226
239
n ++
227
240
}
228
- l .emitValue (String , string (l .source [l .start + 1 :l .end - 1 ]))
241
+ l .emitValue (String , string (l .runes [l .start . rune + 1 :l .end . rune - 1 ]))
229
242
return
230
243
}
0 commit comments