Skip to content

Commit 0da1997

Browse files
authored
Merge branch 'master' into add-set-impl
2 parents e5f9c2e + ee11858 commit 0da1997

File tree

8 files changed

+129
-14
lines changed

8 files changed

+129
-14
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ Benchmarks run on standard Linode 1024 box.
232232
Compared libraries:
233233
* https://golang.org/pkg/encoding/json
234234
* https://github.com/Jeffail/gabs
235+
* https://github.com/a8m/djson
235236
* https://github.com/bitly/go-simplejson
236237
* https://github.com/antonholmquist/jason
237238
* https://github.com/mreiferson/go-ujson
@@ -264,11 +265,12 @@ Library | time/op | bytes/op | allocs/op
264265
------ | ------- | -------- | -------
265266
encoding/json struct | 7879 | 880 | 18
266267
encoding/json interface{} | 8946 | 1521 | 38
267-
Jeffail/gabs | 10053 | 1649 | 46
268+
Jeffail/gabs | 10053 | 1649 | 46
268269
bitly/go-simplejson | 10128 | 2241 | 36
269270
antonholmquist/jason | 27152 | 7237 | 101
270271
github.com/ugorji/go/codec | 8806 | 2176 | 31
271272
mreiferson/go-ujson | **7008** | **1409** | 37
273+
a8m/djson | 3862 | 1249 | 30
272274
pquerna/ffjson | **3769** | **624** | **15**
273275
mailru/easyjson | **2002** | **192** | **9**
274276
buger/jsonparser | **1367** | **0** | **0**
@@ -293,6 +295,7 @@ https://github.com/buger/jsonparser/blob/master/benchmark/benchmark_medium_paylo
293295
| antonholmquist/jason | 94099 | 19013 | 247 |
294296
| github.com/ugorji/go/codec | 114719 | 6712 | 152 |
295297
| mreiferson/go-ujson | **56972** | 11547 | 270 |
298+
| a8m/djson | 28525 | 10196 | 198 |
296299
| pquerna/ffjson | **20298** | **856** | **20** |
297300
| mailru/easyjson | **10512** | **336** | **12** |
298301
| buger/jsonparser | **15955** | **0** | **0** |
@@ -316,6 +319,7 @@ https://github.com/buger/jsonparser/blob/master/benchmark/benchmark_large_payloa
316319
| --- | --- | --- | --- |
317320
| encoding/json struct | 748336 | 8272 | 307 |
318321
| encoding/json interface{} | 1224271 | 215425 | 3395 |
322+
| a8m/djson | 510082 | 213682 | 2845 |
319323
| pquerna/ffjson | **312271** | **7792** | **298** |
320324
| mailru/easyjson | **154186** | **6992** | **288** |
321325
| buger/jsonparser | **85308** | **0** | **0** |

benchmark/benchmark_large_payload_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
// "github.com/Jeffail/gabs"
1212
// "github.com/bitly/go-simplejson"
1313
"encoding/json"
14+
"github.com/a8m/djson"
1415
jlexer "github.com/mailru/easyjson/jlexer"
1516
"github.com/pquerna/ffjson/ffjson"
1617
// "github.com/antonholmquist/jason"
@@ -109,3 +110,22 @@ func BenchmarkEasyJsonLarge(b *testing.B) {
109110
}
110111
}
111112
}
113+
114+
/*
115+
github.com/a8m/djson
116+
*/
117+
func BenchmarkDjsonLarge(b *testing.B) {
118+
for i := 0; i < b.N; i++ {
119+
m, _ := djson.DecodeObject(largeFixture)
120+
users := m["users"].([]interface{})
121+
for _, u := range users {
122+
nothing(u.(map[string]interface{})["username"].(string))
123+
}
124+
125+
topics := m["topics"].(map[string]interface{})["topics"].([]interface{})
126+
for _, t := range topics {
127+
tI := t.(map[string]interface{})
128+
nothing(tI["id"].(float64), tI["slug"].(string))
129+
}
130+
}
131+
}

benchmark/benchmark_medium_payload_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package benchmark
77
import (
88
"encoding/json"
99
"github.com/Jeffail/gabs"
10+
"github.com/a8m/djson"
1011
"github.com/antonholmquist/jason"
1112
"github.com/bitly/go-simplejson"
1213
"github.com/buger/jsonparser"
@@ -282,6 +283,26 @@ func BenchmarkUjsonMedium(b *testing.B) {
282283
}
283284
}
284285

286+
/*
287+
github.com/a8m/djson
288+
*/
289+
func BenchmarkDjsonMedium(b *testing.B) {
290+
for i := 0; i < b.N; i++ {
291+
m, _ := djson.DecodeObject(mediumFixture)
292+
person := m["person"].(map[string]interface{})
293+
name := person["name"].(map[string]interface{})
294+
github := person["github"].(map[string]interface{})
295+
company := m["company"]
296+
gravatar := person["gravatar"].(map[string]interface{})
297+
avatars := gravatar["avatars"].([]interface{})
298+
299+
nothing(name["fullName"].(string), github["followers"].(float64), company)
300+
for _, a := range avatars {
301+
nothing(a.(map[string]interface{})["url"])
302+
}
303+
}
304+
}
305+
285306
/*
286307
github.com/ugorji/go/codec
287308
*/

benchmark/benchmark_small_payload_test.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package benchmark
77
import (
88
"encoding/json"
99
"github.com/Jeffail/gabs"
10+
"github.com/a8m/djson"
1011
"github.com/antonholmquist/jason"
1112
"github.com/bitly/go-simplejson"
1213
"github.com/buger/jsonparser"
@@ -242,7 +243,6 @@ func BenchmarkJasonSmall(b *testing.B) {
242243
/*
243244
github.com/mreiferson/go-ujson
244245
*/
245-
246246
func BenchmarkUjsonSmall(b *testing.B) {
247247
for i := 0; i < b.N; i++ {
248248
json, _ := ujson.NewFromBytes(smallFixture)
@@ -256,6 +256,16 @@ func BenchmarkUjsonSmall(b *testing.B) {
256256
}
257257
}
258258

259+
/*
260+
github.com/a8m/djson
261+
*/
262+
func BenchmarkDjsonSmall(b *testing.B) {
263+
for i := 0; i < b.N; i++ {
264+
m, _ := djson.DecodeObject(smallFixture)
265+
nothing(m["uuid"].(string), m["tz"].(float64), m["ua"].(string), m["st"].(float64))
266+
}
267+
}
268+
259269
/*
260270
github.com/ugorji/go/codec
261271
*/

escape.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ const supplementalPlanesOffset = 0x10000
1111
const highSurrogateOffset = 0xD800
1212
const lowSurrogateOffset = 0xDC00
1313

14+
const basicMultilingualPlaneReservedOffset = 0xDFFF
15+
const basicMultilingualPlaneOffset = 0xFFFF
16+
1417
func combineUTF16Surrogates(high, low rune) rune {
1518
return supplementalPlanesOffset + (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset)
1619
}
@@ -49,11 +52,18 @@ func decodeSingleUnicodeEscape(in []byte) (rune, bool) {
4952
return rune(h1<<12 + h2<<8 + h3<<4 + h4), true
5053
}
5154

55+
// isUTF16EncodedRune checks if a rune is in the range for non-BMP characters,
56+
// which is used to describe UTF16 chars.
57+
// Source: https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
58+
func isUTF16EncodedRune(r rune) bool {
59+
return highSurrogateOffset <= r && r <= basicMultilingualPlaneReservedOffset
60+
}
61+
5262
func decodeUnicodeEscape(in []byte) (rune, int) {
5363
if r, ok := decodeSingleUnicodeEscape(in); !ok {
5464
// Invalid Unicode escape
5565
return utf8.RuneError, -1
56-
} else if r < highSurrogateOffset {
66+
} else if r <= basicMultilingualPlaneOffset && !isUTF16EncodedRune(r) {
5767
// Valid Unicode escape in Basic Multilingual Plane
5868
return r, 6
5969
} else if r2, ok := decodeSingleUnicodeEscape(in[6:]); !ok { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain
@@ -66,7 +76,6 @@ func decodeUnicodeEscape(in []byte) (rune, int) {
6676
// Valid UTF16 surrogate pair
6777
return combineUTF16Surrogates(r, r2), 12
6878
}
69-
7079
}
7180

7281
// backslashCharEscapeTable: when '\X' is found for some byte X, it is to be replaced with backslashCharEscapeTable[X]

escape_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,14 @@ var singleUnicodeEscapeTests = append([]escapedUnicodeRuneTest{
4444
{in: `\uD83D`, out: 0xD83D, len: 6},
4545
{in: `\uDE03`, out: 0xDE03, len: 6},
4646
{in: `\uFFFF`, out: 0xFFFF, len: 6},
47+
{in: `\uFF11`, out: '1', len: 6},
4748
}, commonUnicodeEscapeTests...)
4849

4950
var multiUnicodeEscapeTests = append([]escapedUnicodeRuneTest{
5051
{in: `\uD83D`, isErr: true},
5152
{in: `\uDE03`, isErr: true},
52-
{in: `\uFFFF`, isErr: true},
53+
{in: `\uFFFF`, out: '\uFFFF', len: 6},
54+
{in: `\uFF11`, out: '1', len: 6},
5355

5456
{in: `\uD83D\uDE03`, out: '\U0001F603', len: 12},
5557
{in: `\uD800\uDC00`, out: '\U00010000', len: 12},
@@ -109,13 +111,14 @@ var unescapeTests = []unescapeTest{
109111
{in: `ab\\de`, out: `ab\de`, canAlloc: true},
110112
{in: `ab\"de`, out: `ab"de`, canAlloc: true},
111113
{in: `ab \u00B0 de`, out: `ab ° de`, canAlloc: true},
114+
{in: `ab \uFF11 de`, out: `ab 1 de`, canAlloc: true},
115+
{in: `\uFFFF`, out: "\uFFFF", canAlloc: true},
112116
{in: `ab \uD83D\uDE03 de`, out: "ab \U0001F603 de", canAlloc: true},
113117
{in: `\u0000\u0000\u0000\u0000\u0000`, out: "\u0000\u0000\u0000\u0000\u0000", canAlloc: true},
114118
{in: `\u0000 \u0000 \u0000 \u0000 \u0000`, out: "\u0000 \u0000 \u0000 \u0000 \u0000", canAlloc: true},
115119
{in: ` \u0000 \u0000 \u0000 \u0000 \u0000 `, out: " \u0000 \u0000 \u0000 \u0000 \u0000 ", canAlloc: true},
116120

117121
{in: `\uD800`, isErr: true},
118-
{in: `\uFFFF`, isErr: true},
119122
{in: `abcde\`, isErr: true},
120123
{in: `abcde\x`, isErr: true},
121124
{in: `abcde\u`, isErr: true},

parser.go

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -193,16 +193,21 @@ func searchKeys(data []byte, keys ...string) int {
193193
case '[':
194194
// If we want to get array element by index
195195
if keyLevel == level && keys[level][0] == '[' {
196-
aIdx, _ := strconv.Atoi(keys[level][1 : len(keys[level])-1])
197-
196+
aIdx, err := strconv.Atoi(keys[level][1 : len(keys[level])-1])
197+
if err != nil {
198+
return -1
199+
}
198200
var curIdx int
199201
var valueFound []byte
200202
var valueOffset int
201-
202203
ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) {
203204
if curIdx == aIdx {
204205
valueFound = value
205206
valueOffset = offset
207+
if dataType == String {
208+
valueOffset = valueOffset - 2
209+
valueFound = data[i + valueOffset:i + valueOffset + len(value) + 2]
210+
}
206211
}
207212
curIdx += 1
208213
})
@@ -308,6 +313,10 @@ func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]str
308313
}
309314

310315
if maxPath >= level {
316+
if level < 1 {
317+
cb(-1, []byte{}, Unknown, MalformedJsonError)
318+
return -1
319+
}
311320
pathsBuf[level-1] = bytesToString(&keyUnesc)
312321

313322
for pi, p := range paths {
@@ -358,6 +367,12 @@ func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]str
358367
case '[':
359368
var arrIdxFlags int64
360369
var pIdxFlags int64
370+
371+
if level < 0 {
372+
cb(-1, []byte{}, Unknown, MalformedJsonError)
373+
return -1
374+
}
375+
361376
for pi, p := range paths {
362377
if len(p) < level+1 || pathFlags&bitwiseFlags[pi+1] != 0 || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) {
363378
continue
@@ -671,10 +686,6 @@ func internalGet(data []byte, keys ...string) (value []byte, dataType ValueType,
671686
value = value[1 : len(value)-1]
672687
}
673688

674-
if dataType == Null {
675-
value = []byte{}
676-
}
677-
678689
return value, dataType, offset, endOffset, nil
679690
}
680691

@@ -963,7 +974,7 @@ func ParseBoolean(b []byte) (bool, error) {
963974
func ParseString(b []byte) (string, error) {
964975
var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
965976
if bU, err := Unescape(b, stackbuf[:]); err != nil {
966-
return "", nil
977+
return "", MalformedValueError
967978
} else {
968979
return string(bU), nil
969980
}

parser_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,13 @@ var getTests = []GetTest{
566566
isFound: true,
567567
data: `{"b":"2"}`,
568568
},
569+
{
570+
desc: "get string from array",
571+
json: `{"a":[{"b":1},"foo", 3],"c":{"c":[1,2]}}`,
572+
path: []string{"a", "[1]"},
573+
isFound: true,
574+
data: "foo",
575+
},
569576
{
570577
desc: "key in path is index",
571578
json: `{"a":[{"b":"1"},{"b":"2"},3],"c":{"c":[1,2]}}`,
@@ -1413,3 +1420,33 @@ func TestParseFloat(t *testing.T) {
14131420
},
14141421
)
14151422
}
1423+
1424+
var parseStringTest = []ParseTest{
1425+
{
1426+
in: `\uFF11`,
1427+
intype: String,
1428+
out: "\uFF11",
1429+
},
1430+
{
1431+
in: `\uFFFF`,
1432+
intype: String,
1433+
out: "\uFFFF",
1434+
},
1435+
{
1436+
in: `\uDF00`,
1437+
intype: String,
1438+
isErr: true,
1439+
},
1440+
}
1441+
1442+
func TestParseString(t *testing.T) {
1443+
runParseTests(t, "ParseString()", parseStringTest,
1444+
func(test ParseTest) (value interface{}, err error) {
1445+
return ParseString([]byte(test.in))
1446+
},
1447+
func(test ParseTest, obtained interface{}) (bool, interface{}) {
1448+
expected := test.out.(string)
1449+
return obtained.(string) == expected, expected
1450+
},
1451+
)
1452+
}

0 commit comments

Comments
 (0)