From b88eaff40e197a6d29ef11b57f3e1b79f95ea755 Mon Sep 17 00:00:00 2001 From: Drew Boyuka Date: Thu, 7 Apr 2016 12:05:07 -0400 Subject: [PATCH 1/9] Initial work on handling escaped strings in JSON --- parser.go | 29 +++++---- parser_test.go | 11 +++- parserescapes.go | 146 ++++++++++++++++++++++++++++++++++++++++++ parserescapes_test.go | 51 +++++++++++++++ 4 files changed, 223 insertions(+), 14 deletions(-) create mode 100644 parserescapes.go create mode 100644 parserescapes_test.go diff --git a/parser.go b/parser.go index 9bb51ed..2511d8f 100644 --- a/parser.go +++ b/parser.go @@ -11,15 +11,20 @@ import ( // Errors var ( - KeyPathNotFoundError = errors.New("Key path not found") - UnknownValueTypeError = errors.New("Unknown value type") - MalformedJsonError = errors.New("Malformed JSON error") - MalformedStringError = errors.New("Value is string, but can't find closing '\"' symbol") - MalformedArrayError = errors.New("Value is array, but can't find closing ']' symbol") - MalformedObjectError = errors.New("Value looks like object, but can't find closing '}' symbol") - MalformedValueError = errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol") + KeyPathNotFoundError = errors.New("Key path not found") + UnknownValueTypeError = errors.New("Unknown value type") + MalformedJsonError = errors.New("Malformed JSON error") + MalformedStringError = errors.New("Value is string, but can't find closing '\"' symbol") + MalformedArrayError = errors.New("Value is array, but can't find closing ']' symbol") + MalformedObjectError = errors.New("Value looks like object, but can't find closing '}' symbol") + MalformedValueError = errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol") + MalformedStringEscapeError = errors.New("Encountered an invalid escape sequence in a string") ) +// How much stack space to allocate for unescaping JSON strings; if a string longer +// than this needs to be escaped, it will result in a heap allocation +const unescapeStackBufSize = 64 + func tokenEnd(data []byte) int { for i, c := range data { switch c { @@ -31,7 +36,6 @@ func tokenEnd(data []byte) int { return -1 } - // Find position of next character which is not ' ', ',', '}' or ']' func nextToken(data []byte, skipComma bool) int { for i, c := range data { @@ -133,10 +137,10 @@ func searchKeys(data []byte, keys ...string) int { i += valueOffset // if string is a Key, and key level match - if data[i] == ':'{ + if data[i] == ':' { key := unsafeBytesToString(data[keyBegin:keyEnd]) - if keyLevel == level-1 && // If key nesting level match current object nested level + if keyLevel == level-1 && // If key nesting level match current object nested level keys[level-1] == key { keyLevel++ // If we found all keys in path @@ -392,9 +396,10 @@ func GetString(data []byte, keys ...string) (val string, err error) { return string(v), nil } - s, err := strconv.Unquote(`"` + unsafeBytesToString(v) + `"`) + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings + out, err := unescape(v, stackbuf[:]) - return s, err + return string(out), err } // GetFloat returns the value retrieved by `Get`, cast to a float64 if possible. diff --git a/parser_test.go b/parser_test.go index ba3f99d..bdbd381 100644 --- a/parser_test.go +++ b/parser_test.go @@ -326,19 +326,26 @@ var getFloatTests = []Test{ var getStringTests = []Test{ Test{ - desc: `Translate unicode symbols`, + desc: `Translate Unicode symbols`, json: `{"c": "test"}`, path: []string{"c"}, isFound: true, data: `test`, }, Test{ - desc: `Translate unicode symbols`, + desc: `Translate Unicode symbols`, json: `{"c": "15\u00b0C"}`, path: []string{"c"}, isFound: true, data: `15°C`, }, + Test{ + desc: `Translate supplementary Unicode symbols`, + json: `{"c": "\uD83D\uDE03"}`, // Smiley face (UTF16 surrogate pair) + path: []string{"c"}, + isFound: true, + data: "\U0001F603", // Smiley face + }, Test{ desc: `Translate escape symbols`, json: `{"c": "\\\""}`, diff --git a/parserescapes.go b/parserescapes.go new file mode 100644 index 0000000..5fdcfca --- /dev/null +++ b/parserescapes.go @@ -0,0 +1,146 @@ +package jsonparser + +import ( + "bytes" + "unicode/utf8" +) + +// JSON Unicode stuff: see https://tools.ietf.org/html/rfc7159#section-7 + +const highSurrogateOffset = 0xDB00 +const lowSurrogateOffset = 0xDC00 + +func combineUTF16Surrogates(high, low rune) rune { + return (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset) +} + +const badHex = -1 + +func h2I(c byte) int { + switch { + case c >= '0' && c <= '9': + return int(c - '0') + case c >= 'A' && c <= 'F': + return int(c - 'A' + 10) + case c >= 'a' && c <= 'f': + return int(c - 'a' + 10) + } + return badHex +} + +// decodeSingleUnicodeEscape decodes a single \uXXXX escape sequence. In JSON, these can either come alone or as part +// of "UTF16 surrogate pairs" that must be handled together; this function only handles one at a time +func decodeSingleUnicodeEscape(in []byte) (rune, bool) { + // We need at least 6 characters total + if len(in) < 6 { + return utf8.RuneError, false + } + + // Convert hex to decimal + h1, h2, h3, h4 := h2I(in[2]), h2I(in[3]), h2I(in[4]), h2I(in[5]) + if h1 == badHex || h2 == badHex || h3 == badHex || h4 == badHex { + return utf8.RuneError, false + } + + // Compose the hex digits + return rune(h1<<12 + h2<<8 + h3<<4 + h4), true +} + +func decodeUnicodeEscape(in []byte) (rune, int) { + if r, ok := decodeSingleUnicodeEscape(in); !ok { + // Invalid Unicode escape + return utf8.RuneError, -1 + } else if r < highSurrogateOffset { + // Valid Unicode escape in Basic Multilingual Plane + return r, 6 + } else if r2, ok := decodeSingleUnicodeEscape(in[6:]); !ok { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain + // UTF16 "high surrogate" without manditory valid following Unicode escape for the "low surrogate" + return utf8.RuneError, -1 + } else { + // Valid UTF16 surrogate pair + return combineUTF16Surrogates(r, r2), 12 + } + +} + +// unescapeToUTF8 unescapes the single escape sequence starting at 'in' into 'out' and returns +// how many characters were consumed from 'in' and emitted into 'out'. +// If a valid escape sequence does not appear as a prefix of 'in', (-1, -1) to signal the error. +func unescapeToUTF8(in, out []byte) (inLen int, outLen int) { + if len(in) < 2 || in[0] != '\\' { + // Invalid escape due to insufficient characters for any escape or no initial backslash + return -1, -1 + } + + // https://tools.ietf.org/html/rfc7159#section-7 + switch e := in[1]; e { + case '"', '\\', 'n', 't', 'r', '/', 'b', 'f': + // Valid basic 2-character escapes + out[0] = e + return 2, 1 + case 'u': + // Unicode escape + if r, inLen := decodeUnicodeEscape(in); inLen == -1 { + // Invalid Unicode escape + return -1, -1 + } else { + // Valid Unicode escape; re-encode as UTF8 + outLen := utf8.EncodeRune(out, r) + return inLen, outLen + } + } + + return -1, -1 +} + +// unescape unescapes the string contained in 'in' and returns it as a slice. +// If 'in' contains no escaped characters: +// Returns 'in'. +// Else, if 'out' is of sufficient capacity (guaranteed if cap(out) >= len(in)): +// 'out' is used to build the unescaped string and is returned with no extra allocation +// Else: +// A new slice is allocated and returned. +func unescape(in, out []byte) ([]byte, error) { + firstBackslash := bytes.IndexByte(in, '\\') + if firstBackslash == -1 { + return in, nil + } + + // Get a buffer of sufficient size (allocate if needed) + if cap(out) < len(in) { + out = make([]byte, len(in)) + } else { + out = out[0:len(in)] + } + + // Copy the first sequence of unescaped bytes to the output and obtain a buffer pointer (subslice) + copy(out, in[:firstBackslash]) + in = in[firstBackslash:] + buf := out[firstBackslash:] + + for len(in) > 0 { + // Unescape the next escaped character + inLen, bufLen := unescapeToUTF8(in, buf) + if inLen == -1 { + return nil, MalformedStringEscapeError + } + + in = in[inLen:] + buf = buf[bufLen:] + + // Copy everything up until the next backslash + nextBackslash := bytes.IndexByte(in, '\\') + if nextBackslash == -1 { + copy(buf, in) + buf = buf[len(in):] + break + } else { + copy(buf, in[:nextBackslash]) + buf = buf[nextBackslash:] + in = in[nextBackslash:] + } + } + + // Trim the out buffer to the amount that was actually emitted + return out[:len(out)-len(buf)], nil +} diff --git a/parserescapes_test.go b/parserescapes_test.go new file mode 100644 index 0000000..744c081 --- /dev/null +++ b/parserescapes_test.go @@ -0,0 +1,51 @@ +package jsonparser + +import ( + "testing" +) + +func TestH2I(t *testing.T) { + hexChars := []byte{'0', '9', 'A', 'F', 'a', 'f', 'x', '\000'} + hexValues := []int{0, 9, 10, 15, 10, 15, -1, -1} + + for i, c := range hexChars { + if v := h2I(c); v != hexValues[i] { + t.Errorf("h2I('%c') returned wrong value (obtained %d, expected %d)", c, v, hexValues[i]) + } + } +} + +func TestDecodeSingleUnicodeEscape(t *testing.T) { + escapeSequences := []string{ + `\"`, + `\\`, + `\n`, + `\t`, + `\r`, + `\/`, + `\b`, + `\f`, + } + + runeValues := []struct { + r rune + ok bool + }{ + {'"', true}, + {'\\', true}, + {'\n', true}, + {'\t', true}, + {'/', true}, + {'\b', true}, + {'\f', true}, + } + + for i, esc := range escapeSequences { + expected := runeValues[i] + if r, ok := decodeSingleUnicodeEscape([]byte(esc)); ok != expected.ok { + t.Errorf("decodeSingleUnicodeEscape(%s) returned 'ok' mismatch: expected %t, obtained %t", esc, expected.ok, ok) + } else if r != expected.r { + t.Errorf("decodeSingleUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)", esc, expected.r, expected.r, r, r) + } + } +} From 244adfedba822c7500844669f5d67e5a622d0b80 Mon Sep 17 00:00:00 2001 From: Drew Boyuka Date: Thu, 7 Apr 2016 13:51:49 -0400 Subject: [PATCH 2/9] Fixed bugs and completed test cases --- parserescapes.go | 16 +++- parserescapes_test.go | 216 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 202 insertions(+), 30 deletions(-) diff --git a/parserescapes.go b/parserescapes.go index 5fdcfca..370f67f 100644 --- a/parserescapes.go +++ b/parserescapes.go @@ -2,16 +2,18 @@ package jsonparser import ( "bytes" + "fmt" "unicode/utf8" ) // JSON Unicode stuff: see https://tools.ietf.org/html/rfc7159#section-7 -const highSurrogateOffset = 0xDB00 +const supplementalPlanesOffset = 0x10000 +const highSurrogateOffset = 0xD800 const lowSurrogateOffset = 0xDC00 func combineUTF16Surrogates(high, low rune) rune { - return (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset) + return supplementalPlanesOffset + (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset) } const badHex = -1 @@ -28,8 +30,12 @@ func h2I(c byte) int { return badHex } -// decodeSingleUnicodeEscape decodes a single \uXXXX escape sequence. In JSON, these can either come alone or as part -// of "UTF16 surrogate pairs" that must be handled together; this function only handles one at a time +// decodeSingleUnicodeEscape decodes a single \uXXXX escape sequence. The prefix \u is assumed to be present and +// is not checked. +// In JSON, these escapes can either come alone or as part of "UTF16 surrogate pairs" that must be handled together. +// This function only handles one; decodeUnicodeEscape handles this more complex case. +var _ = fmt.Println + func decodeSingleUnicodeEscape(in []byte) (rune, bool) { // We need at least 6 characters total if len(in) < 6 { @@ -125,6 +131,8 @@ func unescape(in, out []byte) ([]byte, error) { return nil, MalformedStringEscapeError } + //fmt.Printf("Decoded rune from UTF: inLen: %d, outLen: %d, rune UTF8: %x\n", inLen, bufLen, buf[:bufLen]) + in = in[inLen:] buf = buf[bufLen:] diff --git a/parserescapes_test.go b/parserescapes_test.go index 744c081..b212886 100644 --- a/parserescapes_test.go +++ b/parserescapes_test.go @@ -1,6 +1,8 @@ package jsonparser import ( + "bytes" + "fmt" "testing" ) @@ -15,37 +17,199 @@ func TestH2I(t *testing.T) { } } +type escapedUnicodeRuneTest struct { + in string + isErr bool + out rune + len int +} + +var commonUnicodeEscapeTests = []escapedUnicodeRuneTest{ + {in: `\u0041`, out: 'A', len: 6}, + {in: `\u0000`, out: 0, len: 6}, + {in: `\u00b0`, out: '°', len: 6}, + {in: `\u00B0`, out: '°', len: 6}, + + {in: `\x1234`, out: 0x1234, len: 6}, // These functions do not check the \u prefix + + {in: ``, isErr: true}, + {in: `\`, isErr: true}, + {in: `\u`, isErr: true}, + {in: `\u1`, isErr: true}, + {in: `\u11`, isErr: true}, + {in: `\u111`, isErr: true}, + {in: `\u123X`, isErr: true}, +} + +var singleUnicodeEscapeTests = append([]escapedUnicodeRuneTest{ + {in: `\uD83D`, out: 0xD83D, len: 6}, + {in: `\uDE03`, out: 0xDE03, len: 6}, + {in: `\uFFFF`, out: 0xFFFF, len: 6}, +}, commonUnicodeEscapeTests...) + +var multiUnicodeEscapeTests = append([]escapedUnicodeRuneTest{ + {in: `\uD83D`, isErr: true}, + {in: `\uDE03`, isErr: true}, + {in: `\uFFFF`, isErr: true}, + + {in: `\uD83D\uDE03`, out: '\U0001F603', len: 12}, + {in: `\uD800\uDC00`, out: '\U00010000', len: 12}, + + {in: `\uD800\`, isErr: true}, + {in: `\uD800\u`, isErr: true}, + {in: `\uD800\uD`, isErr: true}, + {in: `\uD800\uDC`, isErr: true}, + {in: `\uD800\uDC0`, isErr: true}, +}, commonUnicodeEscapeTests...) + func TestDecodeSingleUnicodeEscape(t *testing.T) { - escapeSequences := []string{ - `\"`, - `\\`, - `\n`, - `\t`, - `\r`, - `\/`, - `\b`, - `\f`, + for _, test := range singleUnicodeEscapeTests { + r, ok := decodeSingleUnicodeEscape([]byte(test.in)) + isErr := !ok + + if isErr != test.isErr { + t.Errorf("decodeSingleUnicodeEscape(%s) returned isErr mismatch: expected %t, obtained %t", test.in, test.isErr, isErr) + } else if isErr { + continue + } else if r != test.out { + t.Errorf("decodeSingleUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)", test.in, test.out, test.out, r, r) + } } +} + +func TestDecodeUnicodeEscape(t *testing.T) { + for _, test := range multiUnicodeEscapeTests { + r, len := decodeUnicodeEscape([]byte(test.in)) + isErr := (len == -1) - runeValues := []struct { - r rune - ok bool - }{ - {'"', true}, - {'\\', true}, - {'\n', true}, - {'\t', true}, - {'/', true}, - {'\b', true}, - {'\f', true}, + if isErr != test.isErr { + t.Errorf("decodeUnicodeEscape(%s) returned isErr mismatch: expected %t, obtained %t", test.in, test.isErr, isErr) + } else if isErr { + continue + } else if len != test.len { + t.Errorf("decodeUnicodeEscape(%s) returned length mismatch: expected %d, obtained %d", test.in, test.len, len) + } else if r != test.out { + t.Errorf("decodeUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)", test.in, test.out, test.out, r, r) + } } +} + +type unescapeTest struct { + in string + out string + canAlloc bool + isErr bool +} - for i, esc := range escapeSequences { - expected := runeValues[i] - if r, ok := decodeSingleUnicodeEscape([]byte(esc)); ok != expected.ok { - t.Errorf("decodeSingleUnicodeEscape(%s) returned 'ok' mismatch: expected %t, obtained %t", esc, expected.ok, ok) - } else if r != expected.r { - t.Errorf("decodeSingleUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)", esc, expected.r, expected.r, r, r) +var unescapeTests = []unescapeTest{ + {in: ``, out: ``, canAlloc: false}, + {in: `a`, out: `a`, canAlloc: false}, + {in: `abcde`, out: `abcde`, canAlloc: false}, + + {in: `ab\\de`, out: `ab\de`, canAlloc: true}, + {in: `ab\"de`, out: `ab"de`, canAlloc: true}, + {in: `ab \u00B0 de`, out: `ab ° de`, canAlloc: true}, + {in: `ab \uD83D\uDE03 de`, out: "ab \U0001F603 de", canAlloc: true}, + {in: `\u0000\u0000\u0000\u0000\u0000`, out: "\u0000\u0000\u0000\u0000\u0000", canAlloc: true}, + {in: `\u0000 \u0000 \u0000 \u0000 \u0000`, out: "\u0000 \u0000 \u0000 \u0000 \u0000", canAlloc: true}, + {in: ` \u0000 \u0000 \u0000 \u0000 \u0000 `, out: " \u0000 \u0000 \u0000 \u0000 \u0000 ", canAlloc: true}, + + {in: `\uD800`, isErr: true}, + {in: `\uFFFF`, isErr: true}, + {in: `abcde\`, isErr: true}, + {in: `abcde\x`, isErr: true}, + {in: `abcde\u`, isErr: true}, + {in: `abcde\u1`, isErr: true}, + {in: `abcde\u12`, isErr: true}, + {in: `abcde\u123`, isErr: true}, + {in: `abcde\uD800`, isErr: true}, + {in: `ab\uD800de`, isErr: true}, + {in: `\uD800abcde`, isErr: true}, +} + +// isSameMemory checks if two slices contain the same memory pointer (meaning one is a +// subslice of the other, with possibly differing lengths/capacities). +func isSameMemory(a, b []byte) bool { + if cap(a) == 0 || cap(b) == 0 { + return cap(a) == cap(b) + } else if a, b = a[:1], b[:1]; a[0] != b[0] { + return false + } else { + a[0]++ + same := (a[0] == b[0]) + a[0]-- + return same + } + +} + +func TestUnescape(t *testing.T) { + + for _, test := range unescapeTests { + type bufferTestCase struct { + buf []byte + isTooSmall bool + } + + var bufs []bufferTestCase + + if len(test.in) == 0 { + // If the input string is length 0, only a buffer of size 0 is a meaningful test + bufs = []bufferTestCase{{nil, false}} + } else { + // For non-empty input strings, we can try several buffer sizes (0, len-1, len) + bufs = []bufferTestCase{ + {nil, true}, + {make([]byte, 0, len(test.in)-1), true}, + {make([]byte, 0, len(test.in)), false}, + } + } + + for _, buftest := range bufs { + in := []byte(test.in) + buf := buftest.buf + + out, err := unescape(in, buf) + isErr := (err != nil) + isAlloc := !isSameMemory(out, in) && !isSameMemory(out, buf) + + if isErr != test.isErr { + t.Errorf("unescape(`%s`, bufsize=%d) returned isErr mismatch: expected %t, obtained %t", test.in, cap(buf), test.isErr, isErr) + break + } else if isErr { + continue + } else if !bytes.Equal(out, []byte(test.out)) { + t.Errorf("unescape(`%s`, bufsize=%d) returned unescaped mismatch: expected `%s` (%v, len %d), obtained `%s` (%v, len %d)", test.in, cap(buf), test.out, []byte(test.out), len(test.out), string(out), out, len(out)) + break + } else if isAlloc != (test.canAlloc && buftest.isTooSmall) { + t.Errorf("unescape(`%s`, bufsize=%d) returned isAlloc mismatch: expected %t, obtained %t", test.in, cap(buf), buftest.isTooSmall, isAlloc) + break + } } } } + +// +//escapeSequences := []string{ +//`\"`, +//`\\`, +//`\n`, +//`\t`, +//`\r`, +//`\/`, +//`\b`, +//`\f`, +//} +// +//runeValues := []struct { +//r rune +//ok bool +//}{ +//{'"', true}, +//{'\\', true}, +//{'\n', true}, +//{'\t', true}, +//{'/', true}, +//{'\b', true}, +//{'\f', true}, +//} From 14bfe185544d9a70373923cedaa2bcf1cf22b290 Mon Sep 17 00:00:00 2001 From: Drew Boyuka Date: Thu, 7 Apr 2016 13:54:21 -0400 Subject: [PATCH 3/9] Cleaned up code cruft from experimentation --- parserescapes.go | 5 ----- parserescapes_test.go | 27 --------------------------- 2 files changed, 32 deletions(-) diff --git a/parserescapes.go b/parserescapes.go index 370f67f..b99a497 100644 --- a/parserescapes.go +++ b/parserescapes.go @@ -2,7 +2,6 @@ package jsonparser import ( "bytes" - "fmt" "unicode/utf8" ) @@ -34,8 +33,6 @@ func h2I(c byte) int { // is not checked. // In JSON, these escapes can either come alone or as part of "UTF16 surrogate pairs" that must be handled together. // This function only handles one; decodeUnicodeEscape handles this more complex case. -var _ = fmt.Println - func decodeSingleUnicodeEscape(in []byte) (rune, bool) { // We need at least 6 characters total if len(in) < 6 { @@ -131,8 +128,6 @@ func unescape(in, out []byte) ([]byte, error) { return nil, MalformedStringEscapeError } - //fmt.Printf("Decoded rune from UTF: inLen: %d, outLen: %d, rune UTF8: %x\n", inLen, bufLen, buf[:bufLen]) - in = in[inLen:] buf = buf[bufLen:] diff --git a/parserescapes_test.go b/parserescapes_test.go index b212886..9490f17 100644 --- a/parserescapes_test.go +++ b/parserescapes_test.go @@ -2,7 +2,6 @@ package jsonparser import ( "bytes" - "fmt" "testing" ) @@ -144,7 +143,6 @@ func isSameMemory(a, b []byte) bool { } func TestUnescape(t *testing.T) { - for _, test := range unescapeTests { type bufferTestCase struct { buf []byte @@ -188,28 +186,3 @@ func TestUnescape(t *testing.T) { } } } - -// -//escapeSequences := []string{ -//`\"`, -//`\\`, -//`\n`, -//`\t`, -//`\r`, -//`\/`, -//`\b`, -//`\f`, -//} -// -//runeValues := []struct { -//r rune -//ok bool -//}{ -//{'"', true}, -//{'\\', true}, -//{'\n', true}, -//{'\t', true}, -//{'/', true}, -//{'\b', true}, -//{'\f', true}, -//} From 2f2fff527d8fb105b4fb736ff5379cdb48959241 Mon Sep 17 00:00:00 2001 From: Drew Boyuka Date: Thu, 7 Apr 2016 14:08:07 -0400 Subject: [PATCH 4/9] searchKeys now accepts escaped JSON keys The keys parameter passed in to searchKeys are interpreted literally, however, and are not unescaped. If search key may contain escape sequences that should be unescaped, the calling code must handle this. --- parser.go | 26 ++++++++++++++++++-------- parser_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/parser.go b/parser.go index 2511d8f..6083a47 100644 --- a/parser.go +++ b/parser.go @@ -116,6 +116,8 @@ func searchKeys(data []byte, keys ...string) int { ln := len(data) lk := len(keys) + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings + for i < ln { switch data[i] { case '"': @@ -138,14 +140,22 @@ func searchKeys(data []byte, keys ...string) int { // if string is a Key, and key level match if data[i] == ':' { - key := unsafeBytesToString(data[keyBegin:keyEnd]) - - if keyLevel == level-1 && // If key nesting level match current object nested level - keys[level-1] == key { - keyLevel++ - // If we found all keys in path - if keyLevel == lk { - return i + 1 + key := data[keyBegin:keyEnd] + + // for unescape: if there are no escape sequences, this is cheap; if there are, it is a + // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize + if keyUnesc, err := unescape(key, stackbuf[:]); err != nil { + return -1 + } else { + keyUnescStr := unsafeBytesToString(keyUnesc) + + if keyLevel == level-1 && // If key nesting level match current object nested level + keys[level-1] == keyUnescStr { + keyLevel++ + // If we found all keys in path + if keyLevel == lk { + return i + 1 + } } } } else { diff --git a/parser_test.go b/parser_test.go index bdbd381..52db538 100644 --- a/parser_test.go +++ b/parser_test.go @@ -158,6 +158,30 @@ var getTests = []Test{ isFound: true, data: `3`, }, + + // Escaped key tests + Test{ + desc: `key with simple escape`, + json: `{"a\\b":1}`, + path: []string{"a\\b"}, + isFound: true, + data: `1`, + }, + Test{ + desc: `key with Unicode escape`, + json: `{"a\u00B0b":1}`, + path: []string{"a\u00B0b"}, + isFound: true, + data: `1`, + }, + Test{ + desc: `key with complex escape`, + json: `{"a\uD83D\uDE03b":1}`, + path: []string{"a\U0001F603b"}, + isFound: true, + data: `1`, + }, + Test{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance desc: `malformed with trailing whitespace`, json: `{"a":1 `, @@ -268,6 +292,7 @@ var getTests = []Test{ path: []string{"a"}, isErr: true, }, + Test{ // This test returns not found instead of a parse error, as checking for the malformed JSON would reduce performance desc: "malformed key (followed by comma followed by colon)", json: `{"a",:1}`, From c064b3b7637c73d14fc8f245dfbedbe46fbc1cb7 Mon Sep 17 00:00:00 2001 From: Drew Boyuka Date: Thu, 7 Apr 2016 14:16:12 -0400 Subject: [PATCH 5/9] Fixed a corner case (bad UTF16 low surrogate) and more cleanup --- parserescapes.go | 3 +++ parserescapes_test.go | 9 +++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/parserescapes.go b/parserescapes.go index b99a497..5ba1531 100644 --- a/parserescapes.go +++ b/parserescapes.go @@ -59,6 +59,9 @@ func decodeUnicodeEscape(in []byte) (rune, int) { } else if r2, ok := decodeSingleUnicodeEscape(in[6:]); !ok { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain // UTF16 "high surrogate" without manditory valid following Unicode escape for the "low surrogate" return utf8.RuneError, -1 + } else if r2 < lowSurrogateOffset { + // Invalid UTF16 "low surrogate" + return utf8.RuneError, -1 } else { // Valid UTF16 surrogate pair return combineUTF16Surrogates(r, r2), 12 diff --git a/parserescapes_test.go b/parserescapes_test.go index 9490f17..8ba982d 100644 --- a/parserescapes_test.go +++ b/parserescapes_test.go @@ -59,6 +59,7 @@ var multiUnicodeEscapeTests = append([]escapedUnicodeRuneTest{ {in: `\uD800\uD`, isErr: true}, {in: `\uD800\uDC`, isErr: true}, {in: `\uD800\uDC0`, isErr: true}, + {in: `\uD800\uDBFF`, isErr: true}, // invalid low surrogate }, commonUnicodeEscapeTests...) func TestDecodeSingleUnicodeEscape(t *testing.T) { @@ -94,10 +95,10 @@ func TestDecodeUnicodeEscape(t *testing.T) { } type unescapeTest struct { - in string - out string - canAlloc bool - isErr bool + in string // escaped string + out string // expected unescaped string + canAlloc bool // can unescape cause an allocation (depending on buffer size)? true iff 'in' contains escape sequence(s) + isErr bool // should this operation result in an error } var unescapeTests = []unescapeTest{ From f184509cf6f69b6caf7b02a61c8f2b43b82ecdb0 Mon Sep 17 00:00:00 2001 From: Drew Boyuka Date: Thu, 7 Apr 2016 14:16:57 -0400 Subject: [PATCH 6/9] Shortened filenames --- parserescapes.go => escape.go | 0 parserescapes_test.go => escape_test.go | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename parserescapes.go => escape.go (100%) rename parserescapes_test.go => escape_test.go (100%) diff --git a/parserescapes.go b/escape.go similarity index 100% rename from parserescapes.go rename to escape.go diff --git a/parserescapes_test.go b/escape_test.go similarity index 100% rename from parserescapes_test.go rename to escape_test.go From 6cf33ba830a1021f90c1201b8601578c7b222b7f Mon Sep 17 00:00:00 2001 From: Drew Boyuka Date: Thu, 7 Apr 2016 14:21:14 -0400 Subject: [PATCH 7/9] Exported Unescape for users who want to unescape their search keys --- escape.go | 2 +- escape_test.go | 2 +- parser.go | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/escape.go b/escape.go index 5ba1531..ec1c4cd 100644 --- a/escape.go +++ b/escape.go @@ -106,7 +106,7 @@ func unescapeToUTF8(in, out []byte) (inLen int, outLen int) { // 'out' is used to build the unescaped string and is returned with no extra allocation // Else: // A new slice is allocated and returned. -func unescape(in, out []byte) ([]byte, error) { +func Unescape(in, out []byte) ([]byte, error) { firstBackslash := bytes.IndexByte(in, '\\') if firstBackslash == -1 { return in, nil diff --git a/escape_test.go b/escape_test.go index 8ba982d..8a5b6b5 100644 --- a/escape_test.go +++ b/escape_test.go @@ -168,7 +168,7 @@ func TestUnescape(t *testing.T) { in := []byte(test.in) buf := buftest.buf - out, err := unescape(in, buf) + out, err := Unescape(in, buf) isErr := (err != nil) isAlloc := !isSameMemory(out, in) && !isSameMemory(out, buf) diff --git a/parser.go b/parser.go index 6083a47..9a15b42 100644 --- a/parser.go +++ b/parser.go @@ -144,7 +144,7 @@ func searchKeys(data []byte, keys ...string) int { // for unescape: if there are no escape sequences, this is cheap; if there are, it is a // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize - if keyUnesc, err := unescape(key, stackbuf[:]); err != nil { + if keyUnesc, err := Unescape(key, stackbuf[:]); err != nil { return -1 } else { keyUnescStr := unsafeBytesToString(keyUnesc) @@ -407,7 +407,7 @@ func GetString(data []byte, keys ...string) (val string, err error) { } var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings - out, err := unescape(v, stackbuf[:]) + out, err := Unescape(v, stackbuf[:]) return string(out), err } From 56ced5da14ac7c4fde018d1c22206e02a8b18565 Mon Sep 17 00:00:00 2001 From: Drew Boyuka Date: Thu, 7 Apr 2016 14:21:42 -0400 Subject: [PATCH 8/9] fixup! Exported Unescape for users who want to unescape their search keys --- escape_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/escape_test.go b/escape_test.go index 8a5b6b5..db18108 100644 --- a/escape_test.go +++ b/escape_test.go @@ -173,15 +173,15 @@ func TestUnescape(t *testing.T) { isAlloc := !isSameMemory(out, in) && !isSameMemory(out, buf) if isErr != test.isErr { - t.Errorf("unescape(`%s`, bufsize=%d) returned isErr mismatch: expected %t, obtained %t", test.in, cap(buf), test.isErr, isErr) + t.Errorf("Unescape(`%s`, bufsize=%d) returned isErr mismatch: expected %t, obtained %t", test.in, cap(buf), test.isErr, isErr) break } else if isErr { continue } else if !bytes.Equal(out, []byte(test.out)) { - t.Errorf("unescape(`%s`, bufsize=%d) returned unescaped mismatch: expected `%s` (%v, len %d), obtained `%s` (%v, len %d)", test.in, cap(buf), test.out, []byte(test.out), len(test.out), string(out), out, len(out)) + t.Errorf("Unescape(`%s`, bufsize=%d) returned unescaped mismatch: expected `%s` (%v, len %d), obtained `%s` (%v, len %d)", test.in, cap(buf), test.out, []byte(test.out), len(test.out), string(out), out, len(out)) break } else if isAlloc != (test.canAlloc && buftest.isTooSmall) { - t.Errorf("unescape(`%s`, bufsize=%d) returned isAlloc mismatch: expected %t, obtained %t", test.in, cap(buf), buftest.isTooSmall, isAlloc) + t.Errorf("Unescape(`%s`, bufsize=%d) returned isAlloc mismatch: expected %t, obtained %t", test.in, cap(buf), buftest.isTooSmall, isAlloc) break } } From c785a7f1c5745b922d7d6013c016bbd456ba257d Mon Sep 17 00:00:00 2001 From: Drew Boyuka Date: Wed, 6 Apr 2016 13:44:06 -0400 Subject: [PATCH 9/9] Added ParsePrimitiveValue convenience function for users --- parser.go | 29 +++++++ parser_test.go | 232 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 182 insertions(+), 79 deletions(-) diff --git a/parser.go b/parser.go index 9a15b42..a96d32f 100644 --- a/parser.go +++ b/parser.go @@ -477,3 +477,32 @@ func unsafeBytesToString(data []byte) string { sh := reflect.StringHeader{Data: h.Data, Len: h.Len} return *(*string)(unsafe.Pointer(&sh)) } + +// ParseBoolean parses a Boolean ValueType into a Go bool (not particularly useful, but here for completeness) +func ParseBoolean(vbytes []byte) bool { + return (vbytes[0] == 't') // assumes value is already validated by Get(), etc. as signaled by jtype == Boolean +} + +// ParseString parses a String ValueType into a Go []byte (the main parsing work is unescaping the JSON string) +func ParseStringAsBytes(vbytes []byte) ([]byte, error) { + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings (hopefully; the Go compiler might just always kick stackbuf[:] into the heap) + return Unescape(vbytes, stackbuf[:]) +} + +// ParseString parses a String ValueType into a Go string (the main parsing work is unescaping the JSON string) +func ParseString(vbytes []byte) (string, error) { + if vbytesUnesc, err := ParseStringAsBytes(vbytes); err != nil { + return "", nil + } else { + return string(vbytesUnesc), nil + } +} + +// ParseNumber parses a Number ValueType into a Go float64 +func ParseNumber(vbytes []byte) (float64, error) { + if v, err := strconv.ParseFloat(unsafeBytesToString(vbytes), 64); err != nil { // TODO: use better BytesParseFloat in PR #25 + return 0, MalformedValueError + } else { + return v, nil + } +} diff --git a/parser_test.go b/parser_test.go index 52db538..38bb20c 100644 --- a/parser_test.go +++ b/parser_test.go @@ -26,7 +26,7 @@ func toStringArray(data []byte) (result []string) { return } -type Test struct { +type GetTest struct { desc string json string path []string @@ -37,121 +37,121 @@ type Test struct { data interface{} } -var getTests = []Test{ +var getTests = []GetTest{ // Found key tests - Test{ + GetTest{ desc: "handling multiple nested keys with same name", json: `{"a":[{"b":1},{"b":2},3],"c":{"c":[1,2]}} }`, path: []string{"c", "c"}, isFound: true, data: `[1,2]`, }, - Test{ + GetTest{ desc: "read basic key", json: `{"a":"b"}`, path: []string{"a"}, isFound: true, data: `b`, }, - Test{ + GetTest{ desc: "read basic key with space", json: `{"a": "b"}`, path: []string{"a"}, isFound: true, data: `b`, }, - Test{ + GetTest{ desc: "read composite key", json: `{"a": { "b":{"c":"d" }}}`, path: []string{"a", "b", "c"}, isFound: true, data: `d`, }, - Test{ + GetTest{ desc: `read numberic value as string`, json: `{"a": "b", "c": 1}`, path: []string{"c"}, isFound: true, data: `1`, }, - Test{ + GetTest{ desc: `handle multiple nested keys with same name`, json: `{"a":[{"b":1},{"b":2},3],"c":{"c":[1,2]}} }`, path: []string{"c", "c"}, isFound: true, data: `[1,2]`, }, - Test{ + GetTest{ desc: `read string values with quotes`, json: `{"a": "string\"with\"quotes"}`, path: []string{"a"}, isFound: true, data: `string\"with\"quotes`, }, - Test{ + GetTest{ desc: `read object`, json: `{"a": { "b":{"c":"d" }}}`, path: []string{"a", "b"}, isFound: true, data: `{"c":"d" }`, }, - Test{ + GetTest{ desc: `empty path`, json: `{"c":"d" }`, path: []string{}, isFound: true, data: `{"c":"d" }`, }, - Test{ + GetTest{ desc: `formatted JSON value`, json: "{\n \"a\": \"b\"\n}", path: []string{"a"}, isFound: true, data: `b`, }, - Test{ + GetTest{ desc: `formatted JSON value 2`, json: "{\n \"a\":\n {\n\"b\":\n {\"c\":\"d\",\n\"e\": \"f\"}\n}\n}", path: []string{"a", "b"}, isFound: true, data: "{\"c\":\"d\",\n\"e\": \"f\"}", }, - Test{ + GetTest{ desc: `whitespace`, json: " \n\r\t{ \n\r\t\"whitespace\" \n\r\t: \n\r\t333 \n\r\t} \n\r\t", path: []string{"whitespace"}, isFound: true, data: "333", }, - Test{ + GetTest{ desc: `escaped backslash quote`, json: `{"a": "\\\""}`, path: []string{"a"}, isFound: true, data: `\\\"`, }, - Test{ + GetTest{ desc: `unescaped backslash quote`, json: `{"a": "\\"}`, path: []string{"a"}, isFound: true, data: `\\`, }, - Test{ + GetTest{ desc: `unicode in JSON`, json: `{"a": "15°C"}`, path: []string{"a"}, isFound: true, data: `15°C`, }, - Test{ + GetTest{ desc: `no padding + nested`, json: `{"a":{"a":"1"},"b":2}`, path: []string{"b"}, isFound: true, data: `2`, }, - Test{ + GetTest{ desc: `no padding + nested + array`, json: `{"a":{"b":[1,2]},"c":3}`, path: []string{"c"}, @@ -160,21 +160,21 @@ var getTests = []Test{ }, // Escaped key tests - Test{ + GetTest{ desc: `key with simple escape`, json: `{"a\\b":1}`, path: []string{"a\\b"}, isFound: true, data: `1`, }, - Test{ + GetTest{ desc: `key with Unicode escape`, json: `{"a\u00B0b":1}`, path: []string{"a\u00B0b"}, isFound: true, data: `1`, }, - Test{ + GetTest{ desc: `key with complex escape`, json: `{"a\uD83D\uDE03b":1}`, path: []string{"a\U0001F603b"}, @@ -182,14 +182,14 @@ var getTests = []Test{ data: `1`, }, - Test{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance + GetTest{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance desc: `malformed with trailing whitespace`, json: `{"a":1 `, path: []string{"a"}, isFound: true, data: `1`, }, - Test{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance + GetTest{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance desc: `malformed with wrong closing bracket`, json: `{"a":1]`, path: []string{"a"}, @@ -198,42 +198,42 @@ var getTests = []Test{ }, // Not found key tests - Test{ + GetTest{ desc: "non-existent key 1", json: `{"a":"b"}`, path: []string{"c"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: "non-existent key 2", json: `{"a":"b"}`, path: []string{"b"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: "non-existent key 3", json: `{"aa":"b"}`, path: []string{"a"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: "apply scope of parent when search for nested key", json: `{"a": { "b": 1}, "c": 2 }`, path: []string{"a", "b", "c"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: `apply scope to key level`, json: `{"a": { "b": 1}, "c": 2 }`, path: []string{"b"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: `handle escaped quote in key name in JSON`, json: `{"key\"key": 1}`, path: []string{"key"}, @@ -242,71 +242,71 @@ var getTests = []Test{ }, // Error/invalid tests - Test{ + GetTest{ desc: `handle escaped quote in key name in JSON`, json: `{"key\"key": 1}`, path: []string{"key"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: `missing closing brace, but can still find key`, json: `{"a":"b"`, path: []string{"a"}, isFound: true, data: `b`, }, - Test{ + GetTest{ desc: `missing value closing quote`, json: `{"a":"b`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `missing value closing curly brace`, json: `{"a": { "b": "c"`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `missing value closing square bracket`, json: `{"a": [1, 2, 3 }`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `missing value 1`, json: `{"a":`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `missing value 2`, json: `{"a": `, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `missing value 3`, json: `{"a":}`, path: []string{"a"}, isErr: true, }, - Test{ // This test returns not found instead of a parse error, as checking for the malformed JSON would reduce performance + GetTest{ // This test returns not found instead of a parse error, as checking for the malformed JSON would reduce performance desc: "malformed key (followed by comma followed by colon)", json: `{"a",:1}`, path: []string{"a"}, isErr: true, }, - Test{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) + GetTest{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) desc: "malformed 'colon chain', lookup first string", json: `{"a":"b":"c"}`, path: []string{"a"}, isFound: true, data: "b", }, - Test{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) + GetTest{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) desc: "malformed 'colon chain', lookup second string", json: `{"a":"b":"c"}`, path: []string{"b"}, @@ -315,15 +315,15 @@ var getTests = []Test{ }, } -var getIntTests = []Test{ - Test{ +var getIntTests = []GetTest{ + GetTest{ desc: `read numeric value as number`, json: `{"a": "b", "c": 1}`, path: []string{"c"}, isFound: true, data: int64(1), }, - Test{ + GetTest{ desc: `read numeric value as number in formatted JSON`, json: "{\"a\": \"b\", \"c\": 1 \n}", path: []string{"c"}, @@ -332,15 +332,15 @@ var getIntTests = []Test{ }, } -var getFloatTests = []Test{ - Test{ +var getFloatTests = []GetTest{ + GetTest{ desc: `read numeric value as number`, json: `{"a": "b", "c": 1.123}`, path: []string{"c"}, isFound: true, data: float64(1.123), }, - Test{ + GetTest{ desc: `read numeric value as number in formatted JSON`, json: "{\"a\": \"b\", \"c\": 23.41323 \n}", path: []string{"c"}, @@ -349,29 +349,29 @@ var getFloatTests = []Test{ }, } -var getStringTests = []Test{ - Test{ +var getStringTests = []GetTest{ + GetTest{ desc: `Translate Unicode symbols`, json: `{"c": "test"}`, path: []string{"c"}, isFound: true, data: `test`, }, - Test{ + GetTest{ desc: `Translate Unicode symbols`, json: `{"c": "15\u00b0C"}`, path: []string{"c"}, isFound: true, data: `15°C`, }, - Test{ + GetTest{ desc: `Translate supplementary Unicode symbols`, json: `{"c": "\uD83D\uDE03"}`, // Smiley face (UTF16 surrogate pair) path: []string{"c"}, isFound: true, data: "\U0001F603", // Smiley face }, - Test{ + GetTest{ desc: `Translate escape symbols`, json: `{"c": "\\\""}`, path: []string{"c"}, @@ -380,48 +380,48 @@ var getStringTests = []Test{ }, } -var getBoolTests = []Test{ - Test{ +var getBoolTests = []GetTest{ + GetTest{ desc: `read boolean true as boolean`, json: `{"a": "b", "c": true}`, path: []string{"c"}, isFound: true, data: true, }, - Test{ + GetTest{ desc: `boolean true in formatted JSON`, json: "{\"a\": \"b\", \"c\": true \n}", path: []string{"c"}, isFound: true, data: true, }, - Test{ + GetTest{ desc: `read boolean false as boolean`, json: `{"a": "b", "c": false}`, path: []string{"c"}, isFound: true, data: false, }, - Test{ + GetTest{ desc: `boolean true in formatted JSON`, json: "{\"a\": \"b\", \"c\": false \n}", path: []string{"c"}, isFound: true, data: false, }, - Test{ + GetTest{ desc: `read fake boolean true`, json: `{"a": txyz}`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `read fake boolean false`, json: `{"a": fwxyz}`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `read boolean true with whitespace and another key`, json: "{\r\t\n \"a\"\r\t\n :\r\t\n true\r\t\n ,\r\t\n \"b\": 1}", path: []string{"a"}, @@ -430,29 +430,29 @@ var getBoolTests = []Test{ }, } -var getArrayTests = []Test{ - Test{ +var getArrayTests = []GetTest{ + GetTest{ desc: `read array of simple values`, json: `{"a": { "b":[1,2,3,4]}}`, path: []string{"a", "b"}, isFound: true, data: []string{`1`, `2`, `3`, `4`}, }, - Test{ + GetTest{ desc: `read array via empty path`, json: `[1,2,3,4]`, path: []string{}, isFound: true, data: []string{`1`, `2`, `3`, `4`}, }, - Test{ + GetTest{ desc: `read array of objects`, json: `{"a": { "b":[{"x":1},{"x":2},{"x":3},{"x":4}]}}`, path: []string{"a", "b"}, isFound: true, data: []string{`{"x":1}`, `{"x":2}`, `{"x":3}`, `{"x":4}`}, }, - Test{ + GetTest{ desc: `read nested array`, json: `{"a": [[[1]],[[2]]]}`, path: []string{"a"}, @@ -463,7 +463,7 @@ var getArrayTests = []Test{ // checkFoundAndNoError checks the dataType and error return from Get*() against the test case expectations. // Returns true the test should proceed to checking the actual data returned from Get*(), or false if the test is finished. -func checkFoundAndNoError(t *testing.T, testKind string, test Test, jtype ValueType, value interface{}, err error) bool { +func checkFoundAndNoError(t *testing.T, testKind string, test GetTest, jtype ValueType, value interface{}, err error) bool { isFound := (jtype != NotExist) isErr := (err != nil) @@ -491,7 +491,7 @@ func checkFoundAndNoError(t *testing.T, testKind string, test Test, jtype ValueT } } -func runTests(t *testing.T, tests []Test, runner func(Test) (interface{}, ValueType, error), typeChecker func(Test, interface{}) (bool, interface{})) { +func runTests(t *testing.T, tests []GetTest, runner func(GetTest) (interface{}, ValueType, error), typeChecker func(GetTest, interface{}) (bool, interface{})) { for _, test := range tests { if activeTest != "" && test.desc != activeTest { continue @@ -522,11 +522,11 @@ func runTests(t *testing.T, tests []Test, runner func(Test) (interface{}, ValueT func TestGet(t *testing.T) { runTests(t, getTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, dataType, _, err = Get([]byte(test.json), test.path...) return }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := []byte(test.data.(string)) return bytes.Equal(expected, value.([]byte)), expected }, @@ -535,11 +535,11 @@ func TestGet(t *testing.T) { func TestGetString(t *testing.T) { runTests(t, getStringTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, err = GetString([]byte(test.json), test.path...) return value, String, err }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := test.data.(string) return expected == value.(string), expected }, @@ -548,11 +548,11 @@ func TestGetString(t *testing.T) { func TestGetInt(t *testing.T) { runTests(t, getIntTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, err = GetInt([]byte(test.json), test.path...) return value, Number, err }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := test.data.(int64) return expected == value.(int64), expected }, @@ -561,11 +561,11 @@ func TestGetInt(t *testing.T) { func TestGetFloat(t *testing.T) { runTests(t, getFloatTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, err = GetFloat([]byte(test.json), test.path...) return value, Number, err }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := test.data.(float64) return expected == value.(float64), expected }, @@ -574,11 +574,11 @@ func TestGetFloat(t *testing.T) { func TestGetBoolean(t *testing.T) { runTests(t, getBoolTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, err = GetBoolean([]byte(test.json), test.path...) return value, Boolean, err }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := test.data.(bool) return expected == value.(bool), expected }, @@ -587,13 +587,87 @@ func TestGetBoolean(t *testing.T) { func TestGetSlice(t *testing.T) { runTests(t, getArrayTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, dataType, _, err = Get([]byte(test.json), test.path...) return }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := test.data.([]string) return reflect.DeepEqual(expected, toStringArray(value.([]byte))), expected }, ) } + +// +//type ParsePrimValTest struct { +// in string +// jtype ValueType +// out interface{} +// isErr bool +//} +// +//var parsePrimValTests = []ParsePrimValTest{ +// ParsePrimValTest{ +// in: `null`, +// jtype: Null, +// out: nil, +// }, +// ParsePrimValTest{ +// in: `true`, +// jtype: Boolean, +// out: true, +// }, +// ParsePrimValTest{ +// in: `false`, +// jtype: Boolean, +// out: false, +// }, +// ParsePrimValTest{ +// in: `0`, +// jtype: Number, +// out: float64(0), +// }, +// ParsePrimValTest{ +// in: `0.0`, +// jtype: Number, +// out: float64(0), +// }, +// ParsePrimValTest{ +// in: `-1.23e4`, +// jtype: Number, +// out: float64(-1.23e4), +// }, +// ParsePrimValTest{ +// in: ``, +// jtype: String, +// out: ``, +// }, +// ParsePrimValTest{ +// in: `abcde`, +// jtype: String, +// out: `abcde`, +// }, +// ParsePrimValTest{ // TODO: This may not be the behavior we want for ParsePrimitiveValue; we may want it to unescape the string +// in: `\"`, +// jtype: String, +// out: `\"`, +// }, +//} +// +//func TestParsePrimitiveValue(t *testing.T) { +// for _, test := range parsePrimValTests { +// out, err := ParsePrimitiveValue([]byte(test.in), test.jtype) +// isErr := (err != nil) +// +// if test.isErr != isErr { +// // If the call didn't match the error expectation, fail +// t.Errorf("Test '%s' (jtype %d) isErr mismatch: expected %t, obtained %t (err %v)", test.in, test.jtype, test.isErr, isErr, err) +// } else if isErr { +// // Else, if there was an error, don't fail and don't check anything further +// } else if reflect.TypeOf(out) != reflect.TypeOf(test.out) { +// t.Errorf("Test '%s' (jtype %d) output type mismatch: expected %T, obtained %T", test.in, test.jtype, test.out, out) +// } else if out != test.out { +// t.Errorf("Test '%s' (jtype %d) output value mismatch: expected %v, obtained %v", test.in, test.jtype, test.out, out) +// } +// } +//}