Skip to content

Commit 9163f06

Browse files
committed
searchKeys and GetString now handle escaped strings
1 parent 16b80d0 commit 9163f06

File tree

4 files changed

+83
-27
lines changed

4 files changed

+83
-27
lines changed

bytes_safe.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ func parseFloat(b *[]byte) (float64, error) {
1717
}
1818

1919
func bytesToString(b *[]byte) string {
20-
return string(*b)
21-
}
20+
return string(*b)
21+
}

bytes_unsafe.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,5 @@ func parseFloat(b *[]byte) (float64, error) {
2727
// A hack until issue golang/go#2632 is fixed.
2828
// See: https://github.com/golang/go/issues/2632
2929
func bytesToString(b *[]byte) string {
30-
return *(*string)(unsafe.Pointer(b))
31-
}
30+
return *(*string)(unsafe.Pointer(b))
31+
}

parser.go

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"bytes"
55
"errors"
66
"fmt"
7-
"strconv"
87
)
98

109
// Errors
@@ -19,6 +18,10 @@ var (
1918
MalformedStringEscapeError = errors.New("Encountered an invalid escape sequence in a string")
2019
)
2120

21+
// How much stack space to allocate for unescaping JSON strings; if a string longer
22+
// than this needs to be escaped, it will result in a heap allocation
23+
const unescapeStackBufSize = 64
24+
2225
func tokenEnd(data []byte) int {
2326
for i, c := range data {
2427
switch c {
@@ -46,24 +49,32 @@ func nextToken(data []byte) int {
4649

4750
// Tries to find the end of string
4851
// Support if string contains escaped quote symbols.
49-
func stringEnd(data []byte) int {
52+
func stringEnd(data []byte) (int, bool) {
53+
escaped := false
5054
for i, c := range data {
5155
if c == '"' {
52-
j := i - 1
53-
for {
54-
if j < 0 || data[j] != '\\' {
55-
return i + 1 // even number of backslashes
56-
}
57-
j--
58-
if j < 0 || data[j] != '\\' {
59-
break // odd number of backslashes
56+
if !escaped {
57+
return i + 1, false
58+
} else {
59+
j := i - 1
60+
for {
61+
if j < 0 || data[j] != '\\' {
62+
return i + 1, true // even number of backslashes
63+
}
64+
j--
65+
if j < 0 || data[j] != '\\' {
66+
break // odd number of backslashes
67+
}
68+
j--
69+
6070
}
61-
j--
6271
}
72+
} else if c == '\\' {
73+
escaped = true
6374
}
6475
}
6576

66-
return -1
77+
return -1, escaped
6778
}
6879

6980
// Find end of the data structure, array or object.
@@ -76,7 +87,7 @@ func blockEnd(data []byte, openSym byte, closeSym byte) int {
7687
for i < ln {
7788
switch data[i] {
7889
case '"': // If inside string, skip it
79-
se := stringEnd(data[i+1:])
90+
se, _ := stringEnd(data[i+1:])
8091
if se == -1 {
8192
return -1
8293
}
@@ -104,13 +115,15 @@ func searchKeys(data []byte, keys ...string) int {
104115
ln := len(data)
105116
lk := len(keys)
106117

118+
var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
119+
107120
for i < ln {
108121
switch data[i] {
109122
case '"':
110123
i++
111124
keyBegin := i
112125

113-
strEnd := stringEnd(data[i:])
126+
strEnd, keyEscaped := stringEnd(data[i:])
114127
if strEnd == -1 {
115128
return -1
116129
}
@@ -124,12 +137,22 @@ func searchKeys(data []byte, keys ...string) int {
124137

125138
i += valueOffset
126139

127-
// if string is a Key, and key level match
128-
if data[i] == ':' {
140+
// if string is a key, and key level match
141+
if data[i] == ':' && keyLevel == level-1 {
129142
key := data[keyBegin:keyEnd]
130143

131-
if keyLevel == level-1 && // If key nesting level match current object nested level
132-
equalStr(&key, keys[level-1]) {
144+
// for unescape: if there are no escape sequences, this is cheap; if there are, it is a
145+
// bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize
146+
var keyUnesc []byte
147+
if !keyEscaped {
148+
keyUnesc = key
149+
} else if ku, err := Unescape(key, stackbuf[:]); err != nil {
150+
return -1
151+
} else {
152+
keyUnesc = ku
153+
}
154+
155+
if equalStr(&keyUnesc, keys[level-1]) {
133156
keyLevel++
134157
// If we found all keys in path
135158
if keyLevel == lk {
@@ -206,7 +229,7 @@ func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset
206229
// if string value
207230
if data[offset] == '"' {
208231
dataType = String
209-
if idx := stringEnd(data[offset+1:]); idx != -1 {
232+
if idx, _ := stringEnd(data[offset+1:]); idx != -1 {
210233
endOffset += idx + 1
211234
} else {
212235
return []byte{}, dataType, offset, MalformedStringError
@@ -371,9 +394,10 @@ func GetString(data []byte, keys ...string) (val string, err error) {
371394
return string(v), nil
372395
}
373396

374-
s, err := strconv.Unquote(`"` + string(v) + `"`)
397+
var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
398+
out, err := Unescape(v, stackbuf[:])
375399

376-
return s, err
400+
return string(out), err
377401
}
378402

379403
// GetFloat returns the value retrieved by `Get`, cast to a float64 if possible.

parser_test.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,30 @@ var getTests = []Test{
158158
isFound: true,
159159
data: `3`,
160160
},
161+
162+
// Escaped key tests
163+
Test{
164+
desc: `key with simple escape`,
165+
json: `{"a\\b":1}`,
166+
path: []string{"a\\b"},
167+
isFound: true,
168+
data: `1`,
169+
},
170+
Test{
171+
desc: `key with Unicode escape`,
172+
json: `{"a\u00B0b":1}`,
173+
path: []string{"a\u00B0b"},
174+
isFound: true,
175+
data: `1`,
176+
},
177+
Test{
178+
desc: `key with complex escape`,
179+
json: `{"a\uD83D\uDE03b":1}`,
180+
path: []string{"a\U0001F603b"},
181+
isFound: true,
182+
data: `1`,
183+
},
184+
161185
Test{ // This test returns a match instead of a parse error, as checking for the malformed JSON would reduce performance
162186
desc: `malformed with trailing whitespace`,
163187
json: `{"a":1 `,
@@ -268,6 +292,7 @@ var getTests = []Test{
268292
path: []string{"a"},
269293
isErr: true,
270294
},
295+
271296
Test{ // This test returns not found instead of a parse error, as checking for the malformed JSON would reduce performance
272297
desc: "malformed key (followed by comma followed by colon)",
273298
json: `{"a",:1}`,
@@ -326,19 +351,26 @@ var getFloatTests = []Test{
326351

327352
var getStringTests = []Test{
328353
Test{
329-
desc: `Translate unicode symbols`,
354+
desc: `Translate Unicode symbols`,
330355
json: `{"c": "test"}`,
331356
path: []string{"c"},
332357
isFound: true,
333358
data: `test`,
334359
},
335360
Test{
336-
desc: `Translate unicode symbols`,
361+
desc: `Translate Unicode symbols`,
337362
json: `{"c": "15\u00b0C"}`,
338363
path: []string{"c"},
339364
isFound: true,
340365
data: `15°C`,
341366
},
367+
Test{
368+
desc: `Translate supplementary Unicode symbols`,
369+
json: `{"c": "\uD83D\uDE03"}`, // Smiley face (UTF16 surrogate pair)
370+
path: []string{"c"},
371+
isFound: true,
372+
data: "\U0001F603", // Smiley face
373+
},
342374
Test{
343375
desc: `Translate escape symbols`,
344376
json: `{"c": "\\\""}`,

0 commit comments

Comments
 (0)