Skip to content

Refactor lexer #653

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion expr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1623,7 +1623,10 @@ func TestCompile_exposed_error(t *testing.T) {

b, err := json.Marshal(err)
require.NoError(t, err)
require.Equal(t, `{"Line":1,"Column":2,"Message":"invalid operation: == (mismatched types int and bool)","Snippet":"\n | 1 == true\n | ..^","Prev":null}`, string(b))
require.Equal(t,
`{"from":2,"to":4,"line":1,"column":2,"message":"invalid operation: == (mismatched types int and bool)","snippet":"\n | 1 == true\n | ..^","prev":null}`,
string(b),
)
}

func TestAsBool_exposed_error(t *testing.T) {
Expand Down Expand Up @@ -2667,3 +2670,11 @@ func TestIssue_integer_truncated_by_compiler(t *testing.T) {
_, err = expr.Compile("fn(256)", expr.Env(env))
require.Error(t, err)
}

func TestExpr_crash(t *testing.T) {
content, err := os.ReadFile("testdata/crash.txt")
require.NoError(t, err)

_, err = expr.Compile(string(content))
require.Error(t, err)
}
28 changes: 21 additions & 7 deletions file/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,36 @@ import (

type Error struct {
Location
Message string
Snippet string
Prev error
Line int `json:"line"`
Column int `json:"column"`
Message string `json:"message"`
Snippet string `json:"snippet"`
Prev error `json:"prev"`
}

func (e *Error) Error() string {
return e.format()
}

func (e *Error) Bind(source *Source) *Error {
if snippet, found := source.Snippet(e.Location.Line); found {
func (e *Error) Bind(source Source) *Error {
e.Line = 1
for i, r := range source {
if i == e.From {
break
}
if r == '\n' {
e.Line++
e.Column = 0
} else {
e.Column++
}
}
if snippet, found := source.Snippet(e.Line); found {
snippet := strings.Replace(snippet, "\t", " ", -1)
srcLine := "\n | " + snippet
var bytes = []byte(snippet)
var indLine = "\n | "
for i := 0; i < e.Location.Column && len(bytes) > 0; i++ {
for i := 0; i < e.Column && len(bytes) > 0; i++ {
_, sz := utf8.DecodeRune(bytes)
bytes = bytes[sz:]
if sz > 1 {
Expand Down Expand Up @@ -54,7 +68,7 @@ func (e *Error) Wrap(err error) {
}

func (e *Error) format() string {
if e.Location.Empty() {
if e.Snippet == "" {
return e.Message
}
return fmt.Sprintf(
Expand Down
8 changes: 2 additions & 6 deletions file/location.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
package file

type Location struct {
Line int // The 1-based line of the location.
Column int // The 0-based column number of the location.
}

func (l Location) Empty() bool {
return l.Column == 0 && l.Line == 0
From int `json:"from"`
To int `json:"to"`
}
73 changes: 21 additions & 52 deletions file/source.go
Original file line number Diff line number Diff line change
@@ -1,78 +1,47 @@
package file

import (
"encoding/json"
"strings"
"unicode/utf8"
)

type Source struct {
contents []rune
lineOffsets []int32
}

func NewSource(contents string) *Source {
s := &Source{
contents: []rune(contents),
}
s.updateOffsets()
return s
}

func (s *Source) MarshalJSON() ([]byte, error) {
return json.Marshal(s.contents)
}

func (s *Source) UnmarshalJSON(b []byte) error {
contents := make([]rune, 0)
err := json.Unmarshal(b, &contents)
if err != nil {
return err
}
type Source []rune

s.contents = contents
s.updateOffsets()
return nil
func NewSource(contents string) Source {
return []rune(contents)
}

func (s *Source) Content() string {
return string(s.contents)
func (s Source) String() string {
return string(s)
}

func (s *Source) Snippet(line int) (string, bool) {
func (s Source) Snippet(line int) (string, bool) {
if s == nil {
return "", false
}
charStart, found := s.findLineOffset(line)
if !found || len(s.contents) == 0 {
lines := strings.Split(string(s), "\n")
lineOffsets := make([]int, len(lines))
var offset int
for i, line := range lines {
offset = offset + utf8.RuneCountInString(line) + 1
lineOffsets[i] = offset
}
charStart, found := getLineOffset(lineOffsets, line)
if !found || len(s) == 0 {
return "", false
}
charEnd, found := s.findLineOffset(line + 1)
charEnd, found := getLineOffset(lineOffsets, line+1)
if found {
return string(s.contents[charStart : charEnd-1]), true
}
return string(s.contents[charStart:]), true
}

// updateOffsets compute line offsets up front as they are referred to frequently.
func (s *Source) updateOffsets() {
lines := strings.Split(string(s.contents), "\n")
offsets := make([]int32, len(lines))
var offset int32
for i, line := range lines {
offset = offset + int32(utf8.RuneCountInString(line)) + 1
offsets[int32(i)] = offset
return string(s[charStart : charEnd-1]), true
}
s.lineOffsets = offsets
return string(s[charStart:]), true
}

// findLineOffset returns the offset where the (1-indexed) line begins,
// or false if line doesn't exist.
func (s *Source) findLineOffset(line int) (int32, bool) {
func getLineOffset(lineOffsets []int, line int) (int, bool) {
if line == 1 {
return 0, true
} else if line > 1 && line <= len(s.lineOffsets) {
offset := s.lineOffsets[line-2]
} else if line > 1 && line <= len(lineOffsets) {
offset := lineOffsets[line-2]
return offset, true
}
return -1, false
Expand Down
15 changes: 0 additions & 15 deletions file/source_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
package file

import (
"encoding/json"
"testing"

"github.com/expr-lang/expr/internal/testify/assert"
)

const (
Expand Down Expand Up @@ -55,15 +52,3 @@ func TestStringSource_SnippetSingleLine(t *testing.T) {
t.Errorf(unexpectedSnippet, t.Name(), str2, "")
}
}

func TestStringSource_MarshalJSON(t *testing.T) {
source := NewSource("hello, world")
encoded, err := json.Marshal(source)
assert.NoError(t, err)
assert.Equal(t, `[104,101,108,108,111,44,32,119,111,114,108,100]`, string(encoded))

decoded := &Source{}
err = json.Unmarshal(encoded, decoded)
assert.NoError(t, err)
assert.Equal(t, source.Content(), decoded.Content())
}
91 changes: 43 additions & 48 deletions parser/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,18 @@ package lexer
import (
"fmt"
"strings"
"unicode/utf8"

"github.com/expr-lang/expr/file"
)

func Lex(source *file.Source) ([]Token, error) {
func Lex(source file.Source) ([]Token, error) {
l := &lexer{
input: source.Content(),
source: source,
tokens: make([]Token, 0),
start: 0,
end: 0,
}

l.loc = file.Location{Line: 1, Column: 0}
l.prev = l.loc
l.startLoc = l.loc
l.commit()

for state := root; state != nil; {
state = state(l)
Expand All @@ -30,34 +28,25 @@ func Lex(source *file.Source) ([]Token, error) {
}

type lexer struct {
input string
source file.Source
tokens []Token
start, end int // current position in input
width int // last rune width
startLoc file.Location // start location
prev, loc file.Location // prev location of end location, end location
start, end int
err *file.Error
}

const eof rune = -1

func (l *lexer) commit() {
l.start = l.end
}

func (l *lexer) next() rune {
if l.end >= len(l.input) {
l.width = 0
if l.end >= len(l.source) {
l.end++
return eof
}
r, w := utf8.DecodeRuneInString(l.input[l.end:])
l.width = w
l.end += w

l.prev = l.loc
if r == '\n' {
l.loc.Line++
l.loc.Column = 0
} else {
l.loc.Column++
}

r := l.source[l.end]
l.end++
return r
}

Expand All @@ -68,8 +57,7 @@ func (l *lexer) peek() rune {
}

func (l *lexer) backup() {
l.end -= l.width
l.loc = l.prev
l.end--
}

func (l *lexer) emit(t Kind) {
Expand All @@ -78,35 +66,39 @@ func (l *lexer) emit(t Kind) {

func (l *lexer) emitValue(t Kind, value string) {
l.tokens = append(l.tokens, Token{
Location: l.startLoc,
Location: file.Location{From: l.start, To: l.end},
Kind: t,
Value: value,
})
l.start = l.end
l.startLoc = l.loc
l.commit()
}

func (l *lexer) emitEOF() {
from := l.end - 2
if from < 0 {
from = 0
}
to := l.end - 1
if to < 0 {
to = 0
}
l.tokens = append(l.tokens, Token{
Location: l.prev, // Point to previous position for better error messages.
Location: file.Location{From: from, To: to},
Kind: EOF,
})
l.start = l.end
l.startLoc = l.loc
l.commit()
}

func (l *lexer) skip() {
l.start = l.end
l.startLoc = l.loc
l.commit()
}

func (l *lexer) word() string {
return l.input[l.start:l.end]
}

func (l *lexer) ignore() {
l.start = l.end
l.startLoc = l.loc
// TODO: boundary check is NOT needed here, but for some reason CI fuzz tests are failing.
if l.start > len(l.source) || l.end > len(l.source) {
return "__invalid__"
}
return string(l.source[l.start:l.end])
}

func (l *lexer) accept(valid string) bool {
Expand All @@ -132,18 +124,18 @@ func (l *lexer) skipSpaces() {
}

func (l *lexer) acceptWord(word string) bool {
pos, loc, prev := l.end, l.loc, l.prev
pos := l.end

l.skipSpaces()

for _, ch := range word {
if l.next() != ch {
l.end, l.loc, l.prev = pos, loc, prev
l.end = pos
return false
}
}
if r := l.peek(); r != ' ' && r != eof {
l.end, l.loc, l.prev = pos, loc, prev
l.end = pos
return false
}

Expand All @@ -153,8 +145,11 @@ func (l *lexer) acceptWord(word string) bool {
func (l *lexer) error(format string, args ...any) stateFn {
if l.err == nil { // show first error
l.err = &file.Error{
Location: l.loc,
Message: fmt.Sprintf(format, args...),
Location: file.Location{
From: l.end - 1,
To: l.end,
},
Message: fmt.Sprintf(format, args...),
}
}
return nil
Expand Down Expand Up @@ -230,6 +225,6 @@ func (l *lexer) scanRawString(quote rune) (n int) {
ch = l.next()
n++
}
l.emitValue(String, l.input[l.start+1:l.end-1])
l.emitValue(String, string(l.source[l.start+1:l.end-1]))
return
}
Loading
Loading