Skip to content

Commit e538c2b

Browse files
authored
Merge pull request #2 from klondikedragon/proper-escaping-and-deepcopy
Proper string escaping and arena deepcopy
2 parents e52cb40 + 2cd80f2 commit e538c2b

File tree

5 files changed

+245
-41
lines changed

5 files changed

+245
-41
lines changed

arena.go

+68-5
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
package fastjson
22

33
import (
4+
"fmt"
45
"strconv"
56
)
67

78
// Arena may be used for fast creation and re-use of Values.
89
//
910
// Typical Arena lifecycle:
1011
//
11-
// 1) Construct Values via the Arena and Value.Set* calls.
12-
// 2) Marshal the constructed Values with Value.MarshalTo call.
13-
// 3) Reset all the constructed Values at once by Arena.Reset call.
14-
// 4) Go to 1 and re-use the Arena.
12+
// 1. Construct Values via the Arena and Value.Set* calls.
13+
// 2. Marshal the constructed Values with Value.MarshalTo call.
14+
// 3. Reset all the constructed Values at once by Arena.Reset call.
15+
// 4. Go to 1 and re-use the Arena.
1516
//
1617
// It is unsafe calling Arena methods from concurrent goroutines.
1718
// Use per-goroutine Arenas or ArenaPool instead.
@@ -52,7 +53,10 @@ func (a *Arena) NewArray() *Value {
5253
return v
5354
}
5455

55-
// NewString returns new string value containing s.
56+
// NewString returns new string value containing s. The data in
57+
// s is assumed to have a valid permanent lifetime. Use the
58+
// NewStringBytes function if the string data needs to be copied
59+
// into the arena.
5660
//
5761
// The returned string is valid until Reset is called on a.
5862
func (a *Arena) NewString(s string) *Value {
@@ -110,6 +114,25 @@ func (a *Arena) NewNumberString(s string) *Value {
110114
return v
111115
}
112116

117+
// NewNumberStringBytes returns a new number value containing b.
118+
//
119+
// The returned number is valid until Reset is called on a.
120+
func (a *Arena) NewNumberStringBytes(b []byte) *Value {
121+
v := a.c.getValue()
122+
v.t = TypeNumber
123+
v.s = a.AllocateStringFromStringBytes(b)
124+
return v
125+
}
126+
127+
// Allocates room for the string from the arena for the string from the given string bytes
128+
//
129+
// The returned string is only valid until Reset is called on a.
130+
func (a *Arena) AllocateStringFromStringBytes(b []byte) string {
131+
bLen := len(a.b)
132+
a.b = append(a.b, b...)
133+
return b2s(a.b[bLen:])
134+
}
135+
113136
// NewNull returns null value.
114137
func (a *Arena) NewNull() *Value {
115138
return valueNull
@@ -124,3 +147,43 @@ func (a *Arena) NewTrue() *Value {
124147
func (a *Arena) NewFalse() *Value {
125148
return valueFalse
126149
}
150+
151+
// Makes a deep copy of a value using this Arena as the memory source.
152+
// Useful when you want to copy a value with a shorter lifetime (for
153+
// example, a value produced by a Scanner) to an Arena with a longer
154+
// lifetime.
155+
func (a *Arena) DeepCopyValue(v *Value) *Value {
156+
t := v.Type()
157+
switch t {
158+
case TypeObject:
159+
o := v.GetObject()
160+
newObject := a.NewObject()
161+
o.Visit(func(key []byte, vv *Value) {
162+
newObject.Set(a.AllocateStringFromStringBytes(key), a.DeepCopyValue(vv))
163+
})
164+
return newObject
165+
case TypeArray:
166+
entries := v.GetArray()
167+
newArray := a.NewArray()
168+
for i, vv := range entries {
169+
newItem := (*Value)(nil)
170+
if vv != nil {
171+
newItem = a.DeepCopyValue(vv)
172+
}
173+
newArray.SetArrayItem(i, newItem)
174+
}
175+
return newArray
176+
case TypeString:
177+
return a.NewStringBytes(v.GetStringBytes())
178+
case TypeNumber:
179+
return a.NewNumberStringBytes(v.GetNumberAsStringBytes())
180+
case TypeTrue:
181+
return a.NewTrue()
182+
case TypeFalse:
183+
return a.NewFalse()
184+
case TypeNull:
185+
return a.NewNull()
186+
default:
187+
panic(fmt.Errorf("BUG: unknown Value type: %d", t))
188+
}
189+
}

arena_test.go

+71-9
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,37 @@ package fastjson
22

33
import (
44
"fmt"
5+
"math/rand"
56
"testing"
67
"time"
78
)
89

9-
func TestArena(t *testing.T) {
10+
// Generic test driver that tests both serial and concurrent executions of the test with a given arena
11+
func testArenaDriver(t *testing.T, iterations int, doTest func(a *Arena) error) {
12+
t.Helper()
1013
t.Run("serial", func(t *testing.T) {
1114
var a Arena
12-
for i := 0; i < 10; i++ {
13-
if err := testArena(&a); err != nil {
15+
for i := 0; i < iterations; i++ {
16+
if err := doTest(&a); err != nil {
1417
t.Fatal(err)
1518
}
1619
a.Reset()
1720
}
1821
})
1922
t.Run("concurrent", func(t *testing.T) {
2023
var ap ArenaPool
21-
workers := 4
24+
workers := 128
2225
ch := make(chan error, workers)
2326
for i := 0; i < workers; i++ {
2427
go func() {
2528
a := ap.Get()
26-
defer ap.Put(a)
29+
defer func() {
30+
a.Reset()
31+
ap.Put(a)
32+
}()
2733
var err error
28-
for i := 0; i < 10; i++ {
29-
if err = testArena(a); err != nil {
34+
for i := 0; i < iterations; i++ {
35+
if err = doTest(a); err != nil {
3036
break
3137
}
3238
}
@@ -39,13 +45,17 @@ func TestArena(t *testing.T) {
3945
if err != nil {
4046
t.Fatal(err)
4147
}
42-
case <-time.After(time.Second):
48+
case <-time.After(time.Second * 3):
4349
t.Fatalf("timeout")
4450
}
4551
}
4652
})
4753
}
4854

55+
func TestArena(t *testing.T) {
56+
testArenaDriver(t, 1000, func(a *Arena) error { return testArena(a) })
57+
}
58+
4959
func testArena(a *Arena) error {
5060
o := a.NewObject()
5161
o.Set("nil1", a.NewNull())
@@ -56,6 +66,7 @@ func testArena(a *Arena) error {
5666
o.Set("ni", ni)
5767
o.Set("nf", a.NewNumberFloat64(1.23))
5868
o.Set("ns", a.NewNumberString("34.43"))
69+
o.Set("nbs", a.NewNumberStringBytes(s2b("-98.765")))
5970
s := a.NewString("foo")
6071
o.Set("str1", s)
6172
o.Set("str2", a.NewStringBytes([]byte("xx")))
@@ -69,9 +80,60 @@ func testArena(a *Arena) error {
6980
o.Set("obj", obj)
7081

7182
str := o.String()
72-
strExpected := `{"nil1":null,"nil2":null,"false":false,"true":true,"ni":123,"nf":1.23,"ns":34.43,"str1":"foo","str2":"xx","a":["foo",123],"obj":{"s":"foo"}}`
83+
strExpected := `{"nil1":null,"nil2":null,"false":false,"true":true,"ni":123,"nf":1.23,"ns":34.43,"nbs":-98.765,"str1":"foo","str2":"xx","a":["foo",123],"obj":{"s":"foo"}}`
7384
if str != strExpected {
7485
return fmt.Errorf("unexpected json\ngot\n%s\nwant\n%s", str, strExpected)
7586
}
7687
return nil
7788
}
89+
90+
func TestArenaDeepCopyValue(t *testing.T) {
91+
testArenaDriver(t, 100, func(a *Arena) error { return testArenaDeepCopyValue(a) })
92+
}
93+
94+
func randValidChar() rune {
95+
for {
96+
c := rune('0' + rand.Intn(78))
97+
if c != '\\' {
98+
return c
99+
}
100+
}
101+
}
102+
103+
func testArenaDeepCopyValue(a *Arena) error {
104+
const jsonTest = `{"nil":null,"false":false,"true":true,"ni":123,"nf":1.23,"ns":34.43,"nbs":-98.765,"str1":"foo","str2":"xx","a":["foo",123,{"s":"x","n":-1.0,"o":{},"nil":null}],"obj":{"s":"foo","a":[123,"s",{"f":{"f2":"v","f3":{"a":98}}}]}}`
105+
// Use a locally controlled parser that we'll reset and reuse to ensure the deep copy truly copied all the values
106+
var p Parser
107+
tempValue, err := p.Parse(jsonTest)
108+
if err != nil {
109+
return fmt.Errorf("failed to parse test json: %w", err)
110+
}
111+
// Validate that the serialized value matches the original test
112+
tempSerialized := b2s(tempValue.MarshalTo(nil))
113+
if tempSerialized != jsonTest {
114+
return fmt.Errorf("initial parsed test JSON does not match\ngot\n%s\nwant\n%s", tempSerialized, jsonTest)
115+
}
116+
// Do a deep copy to preserve the values after the parser is reused
117+
shallowCopy := tempValue
118+
deepCopy := a.DeepCopyValue(tempValue)
119+
// Now reuse the parser enough times so it should trash a shallow copy
120+
for i := 0; i < 100; i++ {
121+
rn := rand.Int63n(2 ^ 40)
122+
rs := fmt.Sprintf("%c%d%d%c", randValidChar(), rand.Int63(), rand.Int63(), randValidChar())
123+
mixerJSON := fmt.Sprintf(`{"n1":%d,"s1":"%s","o1":{"a1":[%d,true,%d,false,"%s",{"f1":%d,"f2":[%d,%d,[%d,"%s"]]}]}}`, rn, rs, rn, rn, rs, rn, rn, rn, rn, rs)
124+
_, err = p.Parse(mixerJSON)
125+
if err != nil {
126+
return fmt.Errorf("failed reusing parser to parser random JSON: %w\nJSON\n%s", err, mixerJSON)
127+
}
128+
}
129+
// Now check that the deep copy is good and the shallow copy is bad
130+
deepCopyJSON := b2s(deepCopy.MarshalTo(nil))
131+
if deepCopyJSON != jsonTest {
132+
return fmt.Errorf("deep copy JSON does not match\ngot\n%s\nwant\n%s", deepCopyJSON, jsonTest)
133+
}
134+
shallowCopyJSON := b2s(shallowCopy.MarshalTo(nil))
135+
if shallowCopyJSON == jsonTest {
136+
return fmt.Errorf("shallow copy JSON matches when it should not match!\nshallow_copy\n%s", shallowCopyJSON)
137+
}
138+
return nil
139+
}

parser.go

+47-20
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@ package fastjson
22

33
import (
44
"fmt"
5-
"github.com/aperturerobotics/fastjson/fastfloat"
65
"strconv"
76
"strings"
87
"unicode/utf16"
8+
9+
"github.com/aperturerobotics/fastjson/fastfloat"
910
)
1011

1112
// Parser parses JSON.
@@ -327,28 +328,41 @@ func parseObject(s string, c *cache, depth int) (*Value, string, error) {
327328
}
328329

329330
func escapeString(dst []byte, s string) []byte {
330-
if !hasSpecialChars(s) {
331-
// Fast path - nothing to escape.
332-
dst = append(dst, '"')
333-
dst = append(dst, s...)
334-
dst = append(dst, '"')
335-
return dst
336-
}
337-
338-
// Slow path.
339-
return strconv.AppendQuote(dst, s)
340-
}
341-
342-
func hasSpecialChars(s string) bool {
343-
if strings.IndexByte(s, '"') >= 0 || strings.IndexByte(s, '\\') >= 0 {
344-
return true
345-
}
331+
dst = append(dst, '"')
346332
for i := 0; i < len(s); i++ {
347-
if s[i] < 0x20 {
348-
return true
333+
c := s[i]
334+
switch {
335+
case c == '"':
336+
// quotation mark
337+
dst = append(dst, []byte{'\\', '"'}...)
338+
case c == '\\':
339+
// reverse solidus
340+
dst = append(dst, []byte{'\\', '\\'}...)
341+
case c >= 0x20:
342+
// default, rest below are control chars
343+
dst = append(dst, c)
344+
case c == 0x08:
345+
dst = append(dst, []byte{'\\', 'b'}...)
346+
case c < 0x09:
347+
dst = append(dst, []byte{'\\', 'u', '0', '0', '0', '0' + c}...)
348+
case c == 0x09:
349+
dst = append(dst, []byte{'\\', 't'}...)
350+
case c == 0x0a:
351+
dst = append(dst, []byte{'\\', 'n'}...)
352+
case c == 0x0c:
353+
dst = append(dst, []byte{'\\', 'f'}...)
354+
case c == 0x0d:
355+
dst = append(dst, []byte{'\\', 'r'}...)
356+
case c < 0x10:
357+
dst = append(dst, []byte{'\\', 'u', '0', '0', '0', 0x57 + c}...)
358+
case c < 0x1a:
359+
dst = append(dst, []byte{'\\', 'u', '0', '0', '1', 0x20 + c}...)
360+
case c < 0x20:
361+
dst = append(dst, []byte{'\\', 'u', '0', '0', '1', 0x47 + c}...)
349362
}
350363
}
351-
return false
364+
dst = append(dst, '"')
365+
return dst
352366
}
353367

354368
func unescapeStringBestEffort(s string) string {
@@ -886,6 +900,19 @@ func (v *Value) GetUint64(keys ...string) uint64 {
886900
return fastfloat.ParseUint64BestEffort(v.s)
887901
}
888902

903+
// GetNumberAsStringBytes returns string representation of the numeric value by the given keys path.
904+
//
905+
// Array indexes may be represented as decimal numbers in keys.
906+
//
907+
// nil is returned for non-existing keys path or for invalid value type.
908+
func (v *Value) GetNumberAsStringBytes(keys ...string) []byte {
909+
v = v.Get(keys...)
910+
if v == nil || v.Type() != TypeNumber {
911+
return nil
912+
}
913+
return s2b(v.s)
914+
}
915+
889916
// GetStringBytes returns string value by the given keys path.
890917
//
891918
// Array indexes may be represented as decimal numbers in keys.

0 commit comments

Comments
 (0)