diff --git a/escape.go b/escape.go new file mode 100644 index 0000000..ec1c4cd --- /dev/null +++ b/escape.go @@ -0,0 +1,152 @@ +package jsonparser + +import ( + "bytes" + "unicode/utf8" +) + +// JSON Unicode stuff: see https://tools.ietf.org/html/rfc7159#section-7 + +const supplementalPlanesOffset = 0x10000 +const highSurrogateOffset = 0xD800 +const lowSurrogateOffset = 0xDC00 + +func combineUTF16Surrogates(high, low rune) rune { + return supplementalPlanesOffset + (high-highSurrogateOffset)<<10 + (low - lowSurrogateOffset) +} + +const badHex = -1 + +func h2I(c byte) int { + switch { + case c >= '0' && c <= '9': + return int(c - '0') + case c >= 'A' && c <= 'F': + return int(c - 'A' + 10) + case c >= 'a' && c <= 'f': + return int(c - 'a' + 10) + } + return badHex +} + +// decodeSingleUnicodeEscape decodes a single \uXXXX escape sequence. The prefix \u is assumed to be present and +// is not checked. +// In JSON, these escapes can either come alone or as part of "UTF16 surrogate pairs" that must be handled together. +// This function only handles one; decodeUnicodeEscape handles this more complex case. +func decodeSingleUnicodeEscape(in []byte) (rune, bool) { + // We need at least 6 characters total + if len(in) < 6 { + return utf8.RuneError, false + } + + // Convert hex to decimal + h1, h2, h3, h4 := h2I(in[2]), h2I(in[3]), h2I(in[4]), h2I(in[5]) + if h1 == badHex || h2 == badHex || h3 == badHex || h4 == badHex { + return utf8.RuneError, false + } + + // Compose the hex digits + return rune(h1<<12 + h2<<8 + h3<<4 + h4), true +} + +func decodeUnicodeEscape(in []byte) (rune, int) { + if r, ok := decodeSingleUnicodeEscape(in); !ok { + // Invalid Unicode escape + return utf8.RuneError, -1 + } else if r < highSurrogateOffset { + // Valid Unicode escape in Basic Multilingual Plane + return r, 6 + } else if r2, ok := decodeSingleUnicodeEscape(in[6:]); !ok { // Note: previous decodeSingleUnicodeEscape success guarantees at least 6 bytes remain + // UTF16 "high surrogate" without manditory valid following Unicode escape for the "low surrogate" + return utf8.RuneError, -1 + } else if r2 < lowSurrogateOffset { + // Invalid UTF16 "low surrogate" + return utf8.RuneError, -1 + } else { + // Valid UTF16 surrogate pair + return combineUTF16Surrogates(r, r2), 12 + } + +} + +// unescapeToUTF8 unescapes the single escape sequence starting at 'in' into 'out' and returns +// how many characters were consumed from 'in' and emitted into 'out'. +// If a valid escape sequence does not appear as a prefix of 'in', (-1, -1) to signal the error. +func unescapeToUTF8(in, out []byte) (inLen int, outLen int) { + if len(in) < 2 || in[0] != '\\' { + // Invalid escape due to insufficient characters for any escape or no initial backslash + return -1, -1 + } + + // https://tools.ietf.org/html/rfc7159#section-7 + switch e := in[1]; e { + case '"', '\\', 'n', 't', 'r', '/', 'b', 'f': + // Valid basic 2-character escapes + out[0] = e + return 2, 1 + case 'u': + // Unicode escape + if r, inLen := decodeUnicodeEscape(in); inLen == -1 { + // Invalid Unicode escape + return -1, -1 + } else { + // Valid Unicode escape; re-encode as UTF8 + outLen := utf8.EncodeRune(out, r) + return inLen, outLen + } + } + + return -1, -1 +} + +// unescape unescapes the string contained in 'in' and returns it as a slice. +// If 'in' contains no escaped characters: +// Returns 'in'. +// Else, if 'out' is of sufficient capacity (guaranteed if cap(out) >= len(in)): +// 'out' is used to build the unescaped string and is returned with no extra allocation +// Else: +// A new slice is allocated and returned. +func Unescape(in, out []byte) ([]byte, error) { + firstBackslash := bytes.IndexByte(in, '\\') + if firstBackslash == -1 { + return in, nil + } + + // Get a buffer of sufficient size (allocate if needed) + if cap(out) < len(in) { + out = make([]byte, len(in)) + } else { + out = out[0:len(in)] + } + + // Copy the first sequence of unescaped bytes to the output and obtain a buffer pointer (subslice) + copy(out, in[:firstBackslash]) + in = in[firstBackslash:] + buf := out[firstBackslash:] + + for len(in) > 0 { + // Unescape the next escaped character + inLen, bufLen := unescapeToUTF8(in, buf) + if inLen == -1 { + return nil, MalformedStringEscapeError + } + + in = in[inLen:] + buf = buf[bufLen:] + + // Copy everything up until the next backslash + nextBackslash := bytes.IndexByte(in, '\\') + if nextBackslash == -1 { + copy(buf, in) + buf = buf[len(in):] + break + } else { + copy(buf, in[:nextBackslash]) + buf = buf[nextBackslash:] + in = in[nextBackslash:] + } + } + + // Trim the out buffer to the amount that was actually emitted + return out[:len(out)-len(buf)], nil +} diff --git a/escape_test.go b/escape_test.go new file mode 100644 index 0000000..db18108 --- /dev/null +++ b/escape_test.go @@ -0,0 +1,189 @@ +package jsonparser + +import ( + "bytes" + "testing" +) + +func TestH2I(t *testing.T) { + hexChars := []byte{'0', '9', 'A', 'F', 'a', 'f', 'x', '\000'} + hexValues := []int{0, 9, 10, 15, 10, 15, -1, -1} + + for i, c := range hexChars { + if v := h2I(c); v != hexValues[i] { + t.Errorf("h2I('%c') returned wrong value (obtained %d, expected %d)", c, v, hexValues[i]) + } + } +} + +type escapedUnicodeRuneTest struct { + in string + isErr bool + out rune + len int +} + +var commonUnicodeEscapeTests = []escapedUnicodeRuneTest{ + {in: `\u0041`, out: 'A', len: 6}, + {in: `\u0000`, out: 0, len: 6}, + {in: `\u00b0`, out: '°', len: 6}, + {in: `\u00B0`, out: '°', len: 6}, + + {in: `\x1234`, out: 0x1234, len: 6}, // These functions do not check the \u prefix + + {in: ``, isErr: true}, + {in: `\`, isErr: true}, + {in: `\u`, isErr: true}, + {in: `\u1`, isErr: true}, + {in: `\u11`, isErr: true}, + {in: `\u111`, isErr: true}, + {in: `\u123X`, isErr: true}, +} + +var singleUnicodeEscapeTests = append([]escapedUnicodeRuneTest{ + {in: `\uD83D`, out: 0xD83D, len: 6}, + {in: `\uDE03`, out: 0xDE03, len: 6}, + {in: `\uFFFF`, out: 0xFFFF, len: 6}, +}, commonUnicodeEscapeTests...) + +var multiUnicodeEscapeTests = append([]escapedUnicodeRuneTest{ + {in: `\uD83D`, isErr: true}, + {in: `\uDE03`, isErr: true}, + {in: `\uFFFF`, isErr: true}, + + {in: `\uD83D\uDE03`, out: '\U0001F603', len: 12}, + {in: `\uD800\uDC00`, out: '\U00010000', len: 12}, + + {in: `\uD800\`, isErr: true}, + {in: `\uD800\u`, isErr: true}, + {in: `\uD800\uD`, isErr: true}, + {in: `\uD800\uDC`, isErr: true}, + {in: `\uD800\uDC0`, isErr: true}, + {in: `\uD800\uDBFF`, isErr: true}, // invalid low surrogate +}, commonUnicodeEscapeTests...) + +func TestDecodeSingleUnicodeEscape(t *testing.T) { + for _, test := range singleUnicodeEscapeTests { + r, ok := decodeSingleUnicodeEscape([]byte(test.in)) + isErr := !ok + + if isErr != test.isErr { + t.Errorf("decodeSingleUnicodeEscape(%s) returned isErr mismatch: expected %t, obtained %t", test.in, test.isErr, isErr) + } else if isErr { + continue + } else if r != test.out { + t.Errorf("decodeSingleUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)", test.in, test.out, test.out, r, r) + } + } +} + +func TestDecodeUnicodeEscape(t *testing.T) { + for _, test := range multiUnicodeEscapeTests { + r, len := decodeUnicodeEscape([]byte(test.in)) + isErr := (len == -1) + + if isErr != test.isErr { + t.Errorf("decodeUnicodeEscape(%s) returned isErr mismatch: expected %t, obtained %t", test.in, test.isErr, isErr) + } else if isErr { + continue + } else if len != test.len { + t.Errorf("decodeUnicodeEscape(%s) returned length mismatch: expected %d, obtained %d", test.in, test.len, len) + } else if r != test.out { + t.Errorf("decodeUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)", test.in, test.out, test.out, r, r) + } + } +} + +type unescapeTest struct { + in string // escaped string + out string // expected unescaped string + canAlloc bool // can unescape cause an allocation (depending on buffer size)? true iff 'in' contains escape sequence(s) + isErr bool // should this operation result in an error +} + +var unescapeTests = []unescapeTest{ + {in: ``, out: ``, canAlloc: false}, + {in: `a`, out: `a`, canAlloc: false}, + {in: `abcde`, out: `abcde`, canAlloc: false}, + + {in: `ab\\de`, out: `ab\de`, canAlloc: true}, + {in: `ab\"de`, out: `ab"de`, canAlloc: true}, + {in: `ab \u00B0 de`, out: `ab ° de`, canAlloc: true}, + {in: `ab \uD83D\uDE03 de`, out: "ab \U0001F603 de", canAlloc: true}, + {in: `\u0000\u0000\u0000\u0000\u0000`, out: "\u0000\u0000\u0000\u0000\u0000", canAlloc: true}, + {in: `\u0000 \u0000 \u0000 \u0000 \u0000`, out: "\u0000 \u0000 \u0000 \u0000 \u0000", canAlloc: true}, + {in: ` \u0000 \u0000 \u0000 \u0000 \u0000 `, out: " \u0000 \u0000 \u0000 \u0000 \u0000 ", canAlloc: true}, + + {in: `\uD800`, isErr: true}, + {in: `\uFFFF`, isErr: true}, + {in: `abcde\`, isErr: true}, + {in: `abcde\x`, isErr: true}, + {in: `abcde\u`, isErr: true}, + {in: `abcde\u1`, isErr: true}, + {in: `abcde\u12`, isErr: true}, + {in: `abcde\u123`, isErr: true}, + {in: `abcde\uD800`, isErr: true}, + {in: `ab\uD800de`, isErr: true}, + {in: `\uD800abcde`, isErr: true}, +} + +// isSameMemory checks if two slices contain the same memory pointer (meaning one is a +// subslice of the other, with possibly differing lengths/capacities). +func isSameMemory(a, b []byte) bool { + if cap(a) == 0 || cap(b) == 0 { + return cap(a) == cap(b) + } else if a, b = a[:1], b[:1]; a[0] != b[0] { + return false + } else { + a[0]++ + same := (a[0] == b[0]) + a[0]-- + return same + } + +} + +func TestUnescape(t *testing.T) { + for _, test := range unescapeTests { + type bufferTestCase struct { + buf []byte + isTooSmall bool + } + + var bufs []bufferTestCase + + if len(test.in) == 0 { + // If the input string is length 0, only a buffer of size 0 is a meaningful test + bufs = []bufferTestCase{{nil, false}} + } else { + // For non-empty input strings, we can try several buffer sizes (0, len-1, len) + bufs = []bufferTestCase{ + {nil, true}, + {make([]byte, 0, len(test.in)-1), true}, + {make([]byte, 0, len(test.in)), false}, + } + } + + for _, buftest := range bufs { + in := []byte(test.in) + buf := buftest.buf + + out, err := Unescape(in, buf) + isErr := (err != nil) + isAlloc := !isSameMemory(out, in) && !isSameMemory(out, buf) + + if isErr != test.isErr { + t.Errorf("Unescape(`%s`, bufsize=%d) returned isErr mismatch: expected %t, obtained %t", test.in, cap(buf), test.isErr, isErr) + break + } else if isErr { + continue + } else if !bytes.Equal(out, []byte(test.out)) { + t.Errorf("Unescape(`%s`, bufsize=%d) returned unescaped mismatch: expected `%s` (%v, len %d), obtained `%s` (%v, len %d)", test.in, cap(buf), test.out, []byte(test.out), len(test.out), string(out), out, len(out)) + break + } else if isAlloc != (test.canAlloc && buftest.isTooSmall) { + t.Errorf("Unescape(`%s`, bufsize=%d) returned isAlloc mismatch: expected %t, obtained %t", test.in, cap(buf), buftest.isTooSmall, isAlloc) + break + } + } + } +} diff --git a/parser.go b/parser.go index 9bb51ed..a96d32f 100644 --- a/parser.go +++ b/parser.go @@ -11,15 +11,20 @@ import ( // Errors var ( - KeyPathNotFoundError = errors.New("Key path not found") - UnknownValueTypeError = errors.New("Unknown value type") - MalformedJsonError = errors.New("Malformed JSON error") - MalformedStringError = errors.New("Value is string, but can't find closing '\"' symbol") - MalformedArrayError = errors.New("Value is array, but can't find closing ']' symbol") - MalformedObjectError = errors.New("Value looks like object, but can't find closing '}' symbol") - MalformedValueError = errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol") + KeyPathNotFoundError = errors.New("Key path not found") + UnknownValueTypeError = errors.New("Unknown value type") + MalformedJsonError = errors.New("Malformed JSON error") + MalformedStringError = errors.New("Value is string, but can't find closing '\"' symbol") + MalformedArrayError = errors.New("Value is array, but can't find closing ']' symbol") + MalformedObjectError = errors.New("Value looks like object, but can't find closing '}' symbol") + MalformedValueError = errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol") + MalformedStringEscapeError = errors.New("Encountered an invalid escape sequence in a string") ) +// How much stack space to allocate for unescaping JSON strings; if a string longer +// than this needs to be escaped, it will result in a heap allocation +const unescapeStackBufSize = 64 + func tokenEnd(data []byte) int { for i, c := range data { switch c { @@ -31,7 +36,6 @@ func tokenEnd(data []byte) int { return -1 } - // Find position of next character which is not ' ', ',', '}' or ']' func nextToken(data []byte, skipComma bool) int { for i, c := range data { @@ -112,6 +116,8 @@ func searchKeys(data []byte, keys ...string) int { ln := len(data) lk := len(keys) + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings + for i < ln { switch data[i] { case '"': @@ -133,15 +139,23 @@ func searchKeys(data []byte, keys ...string) int { i += valueOffset // if string is a Key, and key level match - if data[i] == ':'{ - key := unsafeBytesToString(data[keyBegin:keyEnd]) - - if keyLevel == level-1 && // If key nesting level match current object nested level - keys[level-1] == key { - keyLevel++ - // If we found all keys in path - if keyLevel == lk { - return i + 1 + if data[i] == ':' { + key := data[keyBegin:keyEnd] + + // for unescape: if there are no escape sequences, this is cheap; if there are, it is a + // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize + if keyUnesc, err := Unescape(key, stackbuf[:]); err != nil { + return -1 + } else { + keyUnescStr := unsafeBytesToString(keyUnesc) + + if keyLevel == level-1 && // If key nesting level match current object nested level + keys[level-1] == keyUnescStr { + keyLevel++ + // If we found all keys in path + if keyLevel == lk { + return i + 1 + } } } } else { @@ -392,9 +406,10 @@ func GetString(data []byte, keys ...string) (val string, err error) { return string(v), nil } - s, err := strconv.Unquote(`"` + unsafeBytesToString(v) + `"`) + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings + out, err := Unescape(v, stackbuf[:]) - return s, err + return string(out), err } // GetFloat returns the value retrieved by `Get`, cast to a float64 if possible. @@ -462,3 +477,32 @@ func unsafeBytesToString(data []byte) string { sh := reflect.StringHeader{Data: h.Data, Len: h.Len} return *(*string)(unsafe.Pointer(&sh)) } + +// ParseBoolean parses a Boolean ValueType into a Go bool (not particularly useful, but here for completeness) +func ParseBoolean(vbytes []byte) bool { + return (vbytes[0] == 't') // assumes value is already validated by Get(), etc. as signaled by jtype == Boolean +} + +// ParseString parses a String ValueType into a Go []byte (the main parsing work is unescaping the JSON string) +func ParseStringAsBytes(vbytes []byte) ([]byte, error) { + var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings (hopefully; the Go compiler might just always kick stackbuf[:] into the heap) + return Unescape(vbytes, stackbuf[:]) +} + +// ParseString parses a String ValueType into a Go string (the main parsing work is unescaping the JSON string) +func ParseString(vbytes []byte) (string, error) { + if vbytesUnesc, err := ParseStringAsBytes(vbytes); err != nil { + return "", nil + } else { + return string(vbytesUnesc), nil + } +} + +// ParseNumber parses a Number ValueType into a Go float64 +func ParseNumber(vbytes []byte) (float64, error) { + if v, err := strconv.ParseFloat(unsafeBytesToString(vbytes), 64); err != nil { // TODO: use better BytesParseFloat in PR #25 + return 0, MalformedValueError + } else { + return v, nil + } +} diff --git a/parser_test.go b/parser_test.go index ba3f99d..38bb20c 100644 --- a/parser_test.go +++ b/parser_test.go @@ -26,7 +26,7 @@ func toStringArray(data []byte) (result []string) { return } -type Test struct { +type GetTest struct { desc string json string path []string @@ -37,135 +37,159 @@ type Test struct { data interface{} } -var getTests = []Test{ +var getTests = []GetTest{ // Found key tests - Test{ + GetTest{ desc: "handling multiple nested keys with same name", json: `{"a":[{"b":1},{"b":2},3],"c":{"c":[1,2]}} }`, path: []string{"c", "c"}, isFound: true, data: `[1,2]`, }, - Test{ + GetTest{ desc: "read basic key", json: `{"a":"b"}`, path: []string{"a"}, isFound: true, data: `b`, }, - Test{ + GetTest{ desc: "read basic key with space", json: `{"a": "b"}`, path: []string{"a"}, isFound: true, data: `b`, }, - Test{ + GetTest{ desc: "read composite key", json: `{"a": { "b":{"c":"d" }}}`, path: []string{"a", "b", "c"}, isFound: true, data: `d`, }, - Test{ + GetTest{ desc: `read numberic value as string`, json: `{"a": "b", "c": 1}`, path: []string{"c"}, isFound: true, data: `1`, }, - Test{ + GetTest{ desc: `handle multiple nested keys with same name`, json: `{"a":[{"b":1},{"b":2},3],"c":{"c":[1,2]}} }`, path: []string{"c", "c"}, isFound: true, data: `[1,2]`, }, - Test{ + GetTest{ desc: `read string values with quotes`, json: `{"a": "string\"with\"quotes"}`, path: []string{"a"}, isFound: true, data: `string\"with\"quotes`, }, - Test{ + GetTest{ desc: `read object`, json: `{"a": { "b":{"c":"d" }}}`, path: []string{"a", "b"}, isFound: true, data: `{"c":"d" }`, }, - Test{ + GetTest{ desc: `empty path`, json: `{"c":"d" }`, path: []string{}, isFound: true, data: `{"c":"d" }`, }, - Test{ + GetTest{ desc: `formatted JSON value`, json: "{\n \"a\": \"b\"\n}", path: []string{"a"}, isFound: true, data: `b`, }, - Test{ + GetTest{ desc: `formatted JSON value 2`, json: "{\n \"a\":\n {\n\"b\":\n {\"c\":\"d\",\n\"e\": \"f\"}\n}\n}", path: []string{"a", "b"}, isFound: true, data: "{\"c\":\"d\",\n\"e\": \"f\"}", }, - Test{ + GetTest{ desc: `whitespace`, json: " \n\r\t{ \n\r\t\"whitespace\" \n\r\t: \n\r\t333 \n\r\t} \n\r\t", path: []string{"whitespace"}, isFound: true, data: "333", }, - Test{ + GetTest{ desc: `escaped backslash quote`, json: `{"a": "\\\""}`, path: []string{"a"}, isFound: true, data: `\\\"`, }, - Test{ + GetTest{ desc: `unescaped backslash quote`, json: `{"a": "\\"}`, path: []string{"a"}, isFound: true, data: `\\`, }, - Test{ + GetTest{ desc: `unicode in JSON`, json: `{"a": "15°C"}`, path: []string{"a"}, isFound: true, data: `15°C`, }, - Test{ + GetTest{ desc: `no padding + nested`, json: `{"a":{"a":"1"},"b":2}`, path: []string{"b"}, isFound: true, data: `2`, }, - Test{ + GetTest{ desc: `no padding + nested + array`, json: `{"a":{"b":[1,2]},"c":3}`, path: []string{"c"}, isFound: true, data: `3`, }, - Test{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance + + // Escaped key tests + GetTest{ + desc: `key with simple escape`, + json: `{"a\\b":1}`, + path: []string{"a\\b"}, + isFound: true, + data: `1`, + }, + GetTest{ + desc: `key with Unicode escape`, + json: `{"a\u00B0b":1}`, + path: []string{"a\u00B0b"}, + isFound: true, + data: `1`, + }, + GetTest{ + desc: `key with complex escape`, + json: `{"a\uD83D\uDE03b":1}`, + path: []string{"a\U0001F603b"}, + isFound: true, + data: `1`, + }, + + GetTest{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance desc: `malformed with trailing whitespace`, json: `{"a":1 `, path: []string{"a"}, isFound: true, data: `1`, }, - Test{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance + GetTest{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance desc: `malformed with wrong closing bracket`, json: `{"a":1]`, path: []string{"a"}, @@ -174,42 +198,42 @@ var getTests = []Test{ }, // Not found key tests - Test{ + GetTest{ desc: "non-existent key 1", json: `{"a":"b"}`, path: []string{"c"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: "non-existent key 2", json: `{"a":"b"}`, path: []string{"b"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: "non-existent key 3", json: `{"aa":"b"}`, path: []string{"a"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: "apply scope of parent when search for nested key", json: `{"a": { "b": 1}, "c": 2 }`, path: []string{"a", "b", "c"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: `apply scope to key level`, json: `{"a": { "b": 1}, "c": 2 }`, path: []string{"b"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: `handle escaped quote in key name in JSON`, json: `{"key\"key": 1}`, path: []string{"key"}, @@ -218,70 +242,71 @@ var getTests = []Test{ }, // Error/invalid tests - Test{ + GetTest{ desc: `handle escaped quote in key name in JSON`, json: `{"key\"key": 1}`, path: []string{"key"}, isFound: false, isErr: true, }, - Test{ + GetTest{ desc: `missing closing brace, but can still find key`, json: `{"a":"b"`, path: []string{"a"}, isFound: true, data: `b`, }, - Test{ + GetTest{ desc: `missing value closing quote`, json: `{"a":"b`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `missing value closing curly brace`, json: `{"a": { "b": "c"`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `missing value closing square bracket`, json: `{"a": [1, 2, 3 }`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `missing value 1`, json: `{"a":`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `missing value 2`, json: `{"a": `, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `missing value 3`, json: `{"a":}`, path: []string{"a"}, isErr: true, }, - Test{ // This test returns not found instead of a parse error, as checking for the malformed JSON would reduce performance + + GetTest{ // This test returns not found instead of a parse error, as checking for the malformed JSON would reduce performance desc: "malformed key (followed by comma followed by colon)", json: `{"a",:1}`, path: []string{"a"}, isErr: true, }, - Test{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) + GetTest{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) desc: "malformed 'colon chain', lookup first string", json: `{"a":"b":"c"}`, path: []string{"a"}, isFound: true, data: "b", }, - Test{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) + GetTest{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance (this is not ideal) desc: "malformed 'colon chain', lookup second string", json: `{"a":"b":"c"}`, path: []string{"b"}, @@ -290,15 +315,15 @@ var getTests = []Test{ }, } -var getIntTests = []Test{ - Test{ +var getIntTests = []GetTest{ + GetTest{ desc: `read numeric value as number`, json: `{"a": "b", "c": 1}`, path: []string{"c"}, isFound: true, data: int64(1), }, - Test{ + GetTest{ desc: `read numeric value as number in formatted JSON`, json: "{\"a\": \"b\", \"c\": 1 \n}", path: []string{"c"}, @@ -307,15 +332,15 @@ var getIntTests = []Test{ }, } -var getFloatTests = []Test{ - Test{ +var getFloatTests = []GetTest{ + GetTest{ desc: `read numeric value as number`, json: `{"a": "b", "c": 1.123}`, path: []string{"c"}, isFound: true, data: float64(1.123), }, - Test{ + GetTest{ desc: `read numeric value as number in formatted JSON`, json: "{\"a\": \"b\", \"c\": 23.41323 \n}", path: []string{"c"}, @@ -324,22 +349,29 @@ var getFloatTests = []Test{ }, } -var getStringTests = []Test{ - Test{ - desc: `Translate unicode symbols`, +var getStringTests = []GetTest{ + GetTest{ + desc: `Translate Unicode symbols`, json: `{"c": "test"}`, path: []string{"c"}, isFound: true, data: `test`, }, - Test{ - desc: `Translate unicode symbols`, + GetTest{ + desc: `Translate Unicode symbols`, json: `{"c": "15\u00b0C"}`, path: []string{"c"}, isFound: true, data: `15°C`, }, - Test{ + GetTest{ + desc: `Translate supplementary Unicode symbols`, + json: `{"c": "\uD83D\uDE03"}`, // Smiley face (UTF16 surrogate pair) + path: []string{"c"}, + isFound: true, + data: "\U0001F603", // Smiley face + }, + GetTest{ desc: `Translate escape symbols`, json: `{"c": "\\\""}`, path: []string{"c"}, @@ -348,48 +380,48 @@ var getStringTests = []Test{ }, } -var getBoolTests = []Test{ - Test{ +var getBoolTests = []GetTest{ + GetTest{ desc: `read boolean true as boolean`, json: `{"a": "b", "c": true}`, path: []string{"c"}, isFound: true, data: true, }, - Test{ + GetTest{ desc: `boolean true in formatted JSON`, json: "{\"a\": \"b\", \"c\": true \n}", path: []string{"c"}, isFound: true, data: true, }, - Test{ + GetTest{ desc: `read boolean false as boolean`, json: `{"a": "b", "c": false}`, path: []string{"c"}, isFound: true, data: false, }, - Test{ + GetTest{ desc: `boolean true in formatted JSON`, json: "{\"a\": \"b\", \"c\": false \n}", path: []string{"c"}, isFound: true, data: false, }, - Test{ + GetTest{ desc: `read fake boolean true`, json: `{"a": txyz}`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `read fake boolean false`, json: `{"a": fwxyz}`, path: []string{"a"}, isErr: true, }, - Test{ + GetTest{ desc: `read boolean true with whitespace and another key`, json: "{\r\t\n \"a\"\r\t\n :\r\t\n true\r\t\n ,\r\t\n \"b\": 1}", path: []string{"a"}, @@ -398,29 +430,29 @@ var getBoolTests = []Test{ }, } -var getArrayTests = []Test{ - Test{ +var getArrayTests = []GetTest{ + GetTest{ desc: `read array of simple values`, json: `{"a": { "b":[1,2,3,4]}}`, path: []string{"a", "b"}, isFound: true, data: []string{`1`, `2`, `3`, `4`}, }, - Test{ + GetTest{ desc: `read array via empty path`, json: `[1,2,3,4]`, path: []string{}, isFound: true, data: []string{`1`, `2`, `3`, `4`}, }, - Test{ + GetTest{ desc: `read array of objects`, json: `{"a": { "b":[{"x":1},{"x":2},{"x":3},{"x":4}]}}`, path: []string{"a", "b"}, isFound: true, data: []string{`{"x":1}`, `{"x":2}`, `{"x":3}`, `{"x":4}`}, }, - Test{ + GetTest{ desc: `read nested array`, json: `{"a": [[[1]],[[2]]]}`, path: []string{"a"}, @@ -431,7 +463,7 @@ var getArrayTests = []Test{ // checkFoundAndNoError checks the dataType and error return from Get*() against the test case expectations. // Returns true the test should proceed to checking the actual data returned from Get*(), or false if the test is finished. -func checkFoundAndNoError(t *testing.T, testKind string, test Test, jtype ValueType, value interface{}, err error) bool { +func checkFoundAndNoError(t *testing.T, testKind string, test GetTest, jtype ValueType, value interface{}, err error) bool { isFound := (jtype != NotExist) isErr := (err != nil) @@ -459,7 +491,7 @@ func checkFoundAndNoError(t *testing.T, testKind string, test Test, jtype ValueT } } -func runTests(t *testing.T, tests []Test, runner func(Test) (interface{}, ValueType, error), typeChecker func(Test, interface{}) (bool, interface{})) { +func runTests(t *testing.T, tests []GetTest, runner func(GetTest) (interface{}, ValueType, error), typeChecker func(GetTest, interface{}) (bool, interface{})) { for _, test := range tests { if activeTest != "" && test.desc != activeTest { continue @@ -490,11 +522,11 @@ func runTests(t *testing.T, tests []Test, runner func(Test) (interface{}, ValueT func TestGet(t *testing.T) { runTests(t, getTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, dataType, _, err = Get([]byte(test.json), test.path...) return }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := []byte(test.data.(string)) return bytes.Equal(expected, value.([]byte)), expected }, @@ -503,11 +535,11 @@ func TestGet(t *testing.T) { func TestGetString(t *testing.T) { runTests(t, getStringTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, err = GetString([]byte(test.json), test.path...) return value, String, err }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := test.data.(string) return expected == value.(string), expected }, @@ -516,11 +548,11 @@ func TestGetString(t *testing.T) { func TestGetInt(t *testing.T) { runTests(t, getIntTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, err = GetInt([]byte(test.json), test.path...) return value, Number, err }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := test.data.(int64) return expected == value.(int64), expected }, @@ -529,11 +561,11 @@ func TestGetInt(t *testing.T) { func TestGetFloat(t *testing.T) { runTests(t, getFloatTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, err = GetFloat([]byte(test.json), test.path...) return value, Number, err }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := test.data.(float64) return expected == value.(float64), expected }, @@ -542,11 +574,11 @@ func TestGetFloat(t *testing.T) { func TestGetBoolean(t *testing.T) { runTests(t, getBoolTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, err = GetBoolean([]byte(test.json), test.path...) return value, Boolean, err }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := test.data.(bool) return expected == value.(bool), expected }, @@ -555,13 +587,87 @@ func TestGetBoolean(t *testing.T) { func TestGetSlice(t *testing.T) { runTests(t, getArrayTests, - func(test Test) (value interface{}, dataType ValueType, err error) { + func(test GetTest) (value interface{}, dataType ValueType, err error) { value, dataType, _, err = Get([]byte(test.json), test.path...) return }, - func(test Test, value interface{}) (bool, interface{}) { + func(test GetTest, value interface{}) (bool, interface{}) { expected := test.data.([]string) return reflect.DeepEqual(expected, toStringArray(value.([]byte))), expected }, ) } + +// +//type ParsePrimValTest struct { +// in string +// jtype ValueType +// out interface{} +// isErr bool +//} +// +//var parsePrimValTests = []ParsePrimValTest{ +// ParsePrimValTest{ +// in: `null`, +// jtype: Null, +// out: nil, +// }, +// ParsePrimValTest{ +// in: `true`, +// jtype: Boolean, +// out: true, +// }, +// ParsePrimValTest{ +// in: `false`, +// jtype: Boolean, +// out: false, +// }, +// ParsePrimValTest{ +// in: `0`, +// jtype: Number, +// out: float64(0), +// }, +// ParsePrimValTest{ +// in: `0.0`, +// jtype: Number, +// out: float64(0), +// }, +// ParsePrimValTest{ +// in: `-1.23e4`, +// jtype: Number, +// out: float64(-1.23e4), +// }, +// ParsePrimValTest{ +// in: ``, +// jtype: String, +// out: ``, +// }, +// ParsePrimValTest{ +// in: `abcde`, +// jtype: String, +// out: `abcde`, +// }, +// ParsePrimValTest{ // TODO: This may not be the behavior we want for ParsePrimitiveValue; we may want it to unescape the string +// in: `\"`, +// jtype: String, +// out: `\"`, +// }, +//} +// +//func TestParsePrimitiveValue(t *testing.T) { +// for _, test := range parsePrimValTests { +// out, err := ParsePrimitiveValue([]byte(test.in), test.jtype) +// isErr := (err != nil) +// +// if test.isErr != isErr { +// // If the call didn't match the error expectation, fail +// t.Errorf("Test '%s' (jtype %d) isErr mismatch: expected %t, obtained %t (err %v)", test.in, test.jtype, test.isErr, isErr, err) +// } else if isErr { +// // Else, if there was an error, don't fail and don't check anything further +// } else if reflect.TypeOf(out) != reflect.TypeOf(test.out) { +// t.Errorf("Test '%s' (jtype %d) output type mismatch: expected %T, obtained %T", test.in, test.jtype, test.out, out) +// } else if out != test.out { +// t.Errorf("Test '%s' (jtype %d) output value mismatch: expected %v, obtained %v", test.in, test.jtype, test.out, out) +// } +// } +//}