diff --git a/Makefile b/Makefile index 3520136..e843368 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ SOURCE = parser.go CONTAINER = jsonparser SOURCE_PATH = /go/src/github.com/buger/jsonparser -BENCHMARK = JsonParserSmall +BENCHMARK = JsonParser BENCHTIME = 5s TEST = . DRUN = docker run -v `pwd`:$(SOURCE_PATH) -i -t $(CONTAINER) diff --git a/benchmark/benchmark_medium_payload_test.go b/benchmark/benchmark_medium_payload_test.go index 8344ed7..b13540a 100644 --- a/benchmark/benchmark_medium_payload_test.go +++ b/benchmark/benchmark_medium_payload_test.go @@ -34,6 +34,32 @@ func BenchmarkJsonParserMedium(b *testing.B) { } } +func BenchmarkJsonParserEachKeyManualMedium(b *testing.B) { + for i := 0; i < b.N; i++ { + paths := [][]string{ + []string{"person", "name", "fullName"}, + []string{"person", "github", "followers"}, + []string{"company"}, + []string{"person", "gravatar", "avatars"}, + } + + jsonparser.EachKey(mediumFixture, func(idx int, value []byte, vt jsonparser.ValueType, err error){ + switch idx { + case 0: + // jsonparser.ParseString(value) + case 1: + jsonparser.ParseInt(value) + case 2: + // jsonparser.ParseString(value) + case 3: + jsonparser.ArrayEach(value, func(avalue []byte, dataType jsonparser.ValueType, offset int, err error) { + jsonparser.Get(avalue, "url") + }) + } + }, paths...) + } +} + /* encoding/json */ diff --git a/benchmark/benchmark_small_payload_test.go b/benchmark/benchmark_small_payload_test.go index 8fa0463..8abed95 100644 --- a/benchmark/benchmark_small_payload_test.go +++ b/benchmark/benchmark_small_payload_test.go @@ -35,6 +35,60 @@ func BenchmarkJsonParserSmall(b *testing.B) { } } +func BenchmarkJsonParserEachKeyManualSmall(b *testing.B) { + for i := 0; i < b.N; i++ { + paths := [][]string{ + []string{"uuid"}, + []string{"tz"}, + []string{"ua"}, + []string{"st"}, + } + + jsonparser.EachKey(smallFixture, func(idx int, value []byte, vt jsonparser.ValueType, err error){ + switch idx { + case 0: + // jsonparser.ParseString(value) + case 1: + jsonparser.ParseInt(value) + case 2: + // jsonparser.ParseString(value) + case 3: + jsonparser.ParseInt(value) + } + }, paths...) + } +} + + +func BenchmarkJsonParserEachKeyStructSmall(b *testing.B) { + for i := 0; i < b.N; i++ { + paths := [][]string{ + []string{"uuid"}, + []string{"tz"}, + []string{"ua"}, + []string{"st"}, + } + var data SmallPayload + + jsonparser.EachKey(smallFixture, func(idx int, value []byte, vt jsonparser.ValueType, err error){ + switch idx { + case 0: + data.Uuid, _ = jsonparser.ParseString(value) + case 1: + v, _ := jsonparser.ParseInt(value) + data.Tz = int(v) + case 2: + data.Ua, _ = jsonparser.ParseString(value) + case 3: + v, _ := jsonparser.ParseInt(value) + data.St = int(v) + } + }, paths...) + + nothing(data.Uuid, data.Tz, data.Ua, data.St) + } +} + /* encoding/json */ diff --git a/parser.go b/parser.go index d3c93f6..8f8313a 100644 --- a/parser.go +++ b/parser.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "math" ) // Errors @@ -178,6 +179,113 @@ func searchKeys(data []byte, keys ...string) int { return -1 } +var bitwiseFlags []int64 +func init() { + for i:=0; i<63; i++ { + bitwiseFlags = append(bitwiseFlags, int64(math.Pow(2, float64(i)))) + } +} + +func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]string) int { + var pathFlags int64 + var level, pathsMatched, i int + ln := len(data) + + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings + + for i < ln { + switch data[i] { + case '"': + i++ + keyBegin := i + + strEnd, keyEscaped := stringEnd(data[i:]) + if strEnd == -1 { + return -1 + } + i += strEnd + + keyEnd := i - 1 + + valueOffset := nextToken(data[i:]) + if valueOffset == -1 { + return -1 + } + + i += valueOffset + + + // if string is a key, and key level match + if data[i] == ':' { + match := false + key := data[keyBegin:keyEnd] + + // for unescape: if there are no escape sequences, this is cheap; if there are, it is a + // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize + var keyUnesc []byte + if !keyEscaped { + keyUnesc = key + } else if ku, err := Unescape(key, stackbuf[:]); err != nil { + return -1 + } else { + keyUnesc = ku + } + + for pi, p := range paths { + if len(p) < level || (pathFlags & bitwiseFlags[pi]) != 0 { + continue + } + + if equalStr(&keyUnesc, p[level-1]) { + match = true + + if len(p) == level { + i++ + pathsMatched++ + pathFlags |= bitwiseFlags[pi] + + v, dt, of, e := Get(data[i:]) + cb(pi, v, dt, e) + + if of != -1 { + i += of + } + + if pathsMatched == len(paths) { + return i + } + } + } + } + + if !match { + tokenOffset := nextToken(data[i+1:]) + i += tokenOffset + + if data[i] == '{' { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + 1 + } + } + } else { + i-- + } + case '{': + level++ + case '}': + level-- + case '[': + // Do not search for keys inside arrays + arraySkip := blockEnd(data[i:], '[', ']') + i += arraySkip - 1 + } + + i++ + } + + return -1 +} + // Data types available in valid JSON data. type ValueType int diff --git a/parser_test.go b/parser_test.go index 970c5f2..373f310 100644 --- a/parser_test.go +++ b/parser_test.go @@ -617,6 +617,43 @@ func TestArrayEach(t *testing.T) { }, "a", "b") } +var testJson = []byte(`{"name": "Name", "order": "Order", "sum": 100, "len": 12, "isPaid": true, "nested": {"a":"test", "b":2, "nested3":{"a":"test3","b":4}, "c": "unknown"}, "nested2": {"a":"test2", "b":3}, "arr": [{"a":"zxc", "b": 1}, {"a":"123", "b":2}], "arrInt": [1,2,3,4], "intPtr": 10}`) + +func TestEachKey(t *testing.T) { + paths := [][]string{ + []string{"name"}, + []string{"nested", "a"}, + []string{"nested", "nested3", "b"}, + } + + keysFound := 0 + + EachKey(testJson, func(idx int, value []byte, vt ValueType, err error){ + keysFound++ + + switch idx { + case 0: + if string(value) != "Name" { + t.Errorf("Should find 1 key") + } + case 1: + if string(value) != "test" { + t.Errorf("Should find 2 key") + } + case 2: + if string(value) != "4" { + t.Errorf("Should find 3 key") + } + default: + t.Errorf("Should found only 3 keys") + } + }, paths...) + + if keysFound != 3 { + t.Errorf("Should find 3 keys: %d", keysFound) + } +} + type ParseTest struct { in string intype ValueType