Skip to content

Commit 21e6d9e

Browse files
committed
Merge branch 'pendo-io-faster-safer-keycomp'
2 parents 4b32de2 + bb9ca39 commit 21e6d9e

File tree

7 files changed

+258
-18
lines changed

7 files changed

+258
-18
lines changed

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
SOURCE = parser.go
22
CONTAINER = jsonparser
33
SOURCE_PATH = /go/src/github.com/buger/jsonparser
4-
BENCHMARK = JsonParser
4+
BENCHMARK = JsonParserSmall
55
BENCHTIME = 5s
66
TEST = .
77

@@ -14,6 +14,9 @@ race:
1414
bench:
1515
docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -benchtime $(BENCHTIME) -v
1616

17+
bench_local:
18+
docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench . $(ARGS) -benchtime $(BENCHTIME) -v
19+
1720
profile:
1821
docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -memprofile mem.mprof -v
1922
docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) go test $(LDFLAGS) -test.benchmem -bench $(BENCHMARK) ./benchmark/ $(ARGS) -cpuprofile cpu.out -v

bytes.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package jsonparser
2+
3+
// About 3x faster then strconv.ParseInt because does not check for range error and support only base 10, which is enough for JSON
4+
func parseInt(bytes []byte) (v int64, ok bool) {
5+
if len(bytes) == 0 {
6+
return 0, false
7+
}
8+
9+
var neg bool = false
10+
if bytes[0] == '-' {
11+
neg = true
12+
bytes = bytes[1:]
13+
}
14+
15+
for _, c := range bytes {
16+
if c >= '0' && c <= '9' {
17+
v = (10 * v) + int64(c-'0')
18+
} else {
19+
return 0, false
20+
}
21+
}
22+
23+
if neg {
24+
return -v, true
25+
} else {
26+
return v, true
27+
}
28+
}

bytes_safe.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// +build appengine appenginevm
2+
3+
package jsonparser
4+
5+
import (
6+
"strconv"
7+
)
8+
9+
// See fastbytes_unsafe.go for explanation on why *[]byte is used (signatures must be consistent with those in that file)
10+
11+
func equalStr(b *[]byte, s string) bool {
12+
return string(*b) == s
13+
}
14+
15+
func parseFloat(b *[]byte) (float64, error) {
16+
return strconv.ParseFloat(string(*b), 64)
17+
}
18+
19+
func bytesToString(b *[]byte) string {
20+
return string(*b)
21+
}

bytes_test.go

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package jsonparser
2+
3+
import (
4+
"strconv"
5+
"testing"
6+
"unsafe"
7+
)
8+
9+
type ParseIntTest struct {
10+
in string
11+
out int64
12+
isErr bool
13+
}
14+
15+
var parseIntTests = []ParseIntTest{
16+
{
17+
in: "0",
18+
out: 0,
19+
},
20+
{
21+
in: "1",
22+
out: 1,
23+
},
24+
{
25+
in: "-1",
26+
out: -1,
27+
},
28+
{
29+
in: "12345",
30+
out: 12345,
31+
},
32+
{
33+
in: "-12345",
34+
out: -12345,
35+
},
36+
{
37+
in: "9223372036854775807",
38+
out: 9223372036854775807,
39+
},
40+
{
41+
in: "-9223372036854775808",
42+
out: -9223372036854775808,
43+
},
44+
{
45+
in: "18446744073709551616", // = 2^64; integer overflow is not detected
46+
out: 0,
47+
},
48+
49+
{
50+
in: "",
51+
isErr: true,
52+
},
53+
{
54+
in: "abc",
55+
isErr: true,
56+
},
57+
{
58+
in: "12345x",
59+
isErr: true,
60+
},
61+
{
62+
in: "123e5",
63+
isErr: true,
64+
},
65+
{
66+
in: "9223372036854775807x",
67+
isErr: true,
68+
},
69+
}
70+
71+
func TestBytesParseInt(t *testing.T) {
72+
for _, test := range parseIntTests {
73+
out, ok := parseInt([]byte(test.in))
74+
if ok != !test.isErr {
75+
t.Errorf("Test '%s' error return did not match expectation (obtained %t, expected %t)", test.in, !ok, test.isErr)
76+
} else if ok && out != test.out {
77+
t.Errorf("Test '%s' did not return the expected value (obtained %d, expected %d)", test.in, out, test.out)
78+
}
79+
}
80+
}
81+
82+
func BenchmarkParseInt(b *testing.B) {
83+
bytes := []byte("123")
84+
for i := 0; i < b.N; i++ {
85+
parseInt(bytes)
86+
}
87+
}
88+
89+
// Alternative implementation using unsafe and delegating to strconv.ParseInt
90+
func BenchmarkParseIntUnsafeSlower(b *testing.B) {
91+
bytes := []byte("123")
92+
for i := 0; i < b.N; i++ {
93+
strconv.ParseInt(*(*string)(unsafe.Pointer(&bytes)), 10, 64)
94+
}
95+
}

bytes_unsafe.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// +build !appengine,!appenginevm
2+
3+
package jsonparser
4+
5+
import (
6+
"strconv"
7+
"unsafe"
8+
)
9+
10+
//
11+
// The reason for using *[]byte rather than []byte in parameters is an optimization. As of Go 1.6,
12+
// the compiler cannot perfectly inline the function when using a non-pointer slice. That is,
13+
// the non-pointer []byte parameter version is slower than if its function body is manually
14+
// inlined, whereas the pointer []byte version is equally fast to the manually inlined
15+
// version. Instruction count in assembly taken from "go tool compile" confirms this difference.
16+
//
17+
// TODO: Remove hack after Go 1.7 release
18+
//
19+
func equalStr(b *[]byte, s string) bool {
20+
return *(*string)(unsafe.Pointer(b)) == s
21+
}
22+
23+
func parseFloat(b *[]byte) (float64, error) {
24+
return strconv.ParseFloat(*(*string)(unsafe.Pointer(b)), 64)
25+
}
26+
27+
// A hack until issue golang/go#2632 is fixed.
28+
// See: https://github.com/golang/go/issues/2632
29+
func bytesToString(b *[]byte) string {
30+
return *(*string)(unsafe.Pointer(b))
31+
}

bytes_unsafe_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// +build !appengine,!appenginevm
2+
3+
package jsonparser
4+
5+
import (
6+
"reflect"
7+
"strings"
8+
"testing"
9+
"unsafe"
10+
)
11+
12+
var (
13+
// short string/[]byte sequences, as the difference between these
14+
// three methods is a constant overhead
15+
benchmarkString = "0123456789x"
16+
benchmarkBytes = []byte("0123456789y")
17+
)
18+
19+
func bytesEqualStrSafe(abytes []byte, bstr string) bool {
20+
return bstr == string(abytes)
21+
}
22+
23+
func bytesEqualStrUnsafeSlower(abytes *[]byte, bstr string) bool {
24+
aslicehdr := (*reflect.SliceHeader)(unsafe.Pointer(abytes))
25+
astrhdr := reflect.StringHeader{Data: aslicehdr.Data, Len: aslicehdr.Len}
26+
return *(*string)(unsafe.Pointer(&astrhdr)) == bstr
27+
}
28+
29+
func TestEqual(t *testing.T) {
30+
if !equalStr(&[]byte{}, "") {
31+
t.Errorf(`equalStr("", ""): expected true, obtained false`)
32+
return
33+
}
34+
35+
longstr := strings.Repeat("a", 1000)
36+
for i := 0; i < len(longstr); i++ {
37+
s1, s2 := longstr[:i]+"1", longstr[:i]+"2"
38+
b1 := []byte(s1)
39+
40+
if !equalStr(&b1, s1) {
41+
t.Errorf(`equalStr("a"*%d + "1", "a"*%d + "1"): expected true, obtained false`, i, i)
42+
break
43+
}
44+
if equalStr(&b1, s2) {
45+
t.Errorf(`equalStr("a"*%d + "1", "a"*%d + "2"): expected false, obtained true`, i, i)
46+
break
47+
}
48+
}
49+
}
50+
51+
func BenchmarkEqualStr(b *testing.B) {
52+
for i := 0; i < b.N; i++ {
53+
equalStr(&benchmarkBytes, benchmarkString)
54+
}
55+
}
56+
57+
// Alternative implementation without using unsafe
58+
func BenchmarkBytesEqualStrSafe(b *testing.B) {
59+
for i := 0; i < b.N; i++ {
60+
bytesEqualStrSafe(benchmarkBytes, benchmarkString)
61+
}
62+
}
63+
64+
// Alternative implementation using unsafe, but that is slower than the current implementation
65+
func BenchmarkBytesEqualStrUnsafeSlower(b *testing.B) {
66+
for i := 0; i < b.N; i++ {
67+
bytesEqualStrUnsafeSlower(&benchmarkBytes, benchmarkString)
68+
}
69+
}

parser.go

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@ import (
44
"bytes"
55
"errors"
66
"fmt"
7-
"reflect"
87
"strconv"
9-
"unsafe"
108
)
119

1210
// Errors
@@ -127,10 +125,10 @@ func searchKeys(data []byte, keys ...string) int {
127125

128126
// if string is a Key, and key level match
129127
if data[i] == ':' {
130-
key := unsafeBytesToString(data[keyBegin:keyEnd])
128+
key := data[keyBegin:keyEnd]
131129

132130
if keyLevel == level-1 && // If key nesting level match current object nested level
133-
keys[level-1] == key {
131+
equalStr(&key, keys[level-1]) {
134132
keyLevel++
135133
// If we found all keys in path
136134
if keyLevel == lk {
@@ -351,7 +349,7 @@ func GetUnsafeString(data []byte, keys ...string) (val string, err error) {
351349
return "", e
352350
}
353351

354-
return unsafeBytesToString(v), nil
352+
return bytesToString(&v), nil
355353
}
356354

357355
// GetString returns the value retrieved by `Get`, cast to a string if possible, trying to properly handle escape and utf8 symbols
@@ -372,7 +370,7 @@ func GetString(data []byte, keys ...string) (val string, err error) {
372370
return string(v), nil
373371
}
374372

375-
s, err := strconv.Unquote(`"` + unsafeBytesToString(v) + `"`)
373+
s, err := strconv.Unquote(`"` + string(v) + `"`)
376374

377375
return s, err
378376
}
@@ -391,7 +389,7 @@ func GetFloat(data []byte, keys ...string) (val float64, err error) {
391389
return 0, fmt.Errorf("Value is not a number: %s", string(v))
392390
}
393391

394-
val, err = strconv.ParseFloat(unsafeBytesToString(v), 64)
392+
val, err = parseFloat(&v)
395393
return
396394
}
397395

@@ -408,8 +406,11 @@ func GetInt(data []byte, keys ...string) (val int64, err error) {
408406
return 0, fmt.Errorf("Value is not a number: %s", string(v))
409407
}
410408

411-
val, err = strconv.ParseInt(unsafeBytesToString(v), 10, 64)
412-
return
409+
if val, ok := parseInt(v); !ok {
410+
return 0, MalformedValueError
411+
} else {
412+
return val, nil
413+
}
413414
}
414415

415416
// GetBoolean returns the value retrieved by `Get`, cast to a bool if possible.
@@ -434,11 +435,3 @@ func GetBoolean(data []byte, keys ...string) (val bool, err error) {
434435

435436
return
436437
}
437-
438-
// A hack until issue golang/go#2632 is fixed.
439-
// See: https://github.com/golang/go/issues/2632
440-
func unsafeBytesToString(data []byte) string {
441-
h := (*reflect.SliceHeader)(unsafe.Pointer(&data))
442-
sh := reflect.StringHeader{Data: h.Data, Len: h.Len}
443-
return *(*string)(unsafe.Pointer(&sh))
444-
}

0 commit comments

Comments
 (0)