1
1
package jsonparser
2
2
3
3
import (
4
+ "bytes"
5
+ "fmt"
4
6
"testing"
5
7
)
6
8
@@ -15,37 +17,199 @@ func TestH2I(t *testing.T) {
15
17
}
16
18
}
17
19
20
+ type escapedUnicodeRuneTest struct {
21
+ in string
22
+ isErr bool
23
+ out rune
24
+ len int
25
+ }
26
+
27
+ var commonUnicodeEscapeTests = []escapedUnicodeRuneTest {
28
+ {in : `\u0041` , out : 'A' , len : 6 },
29
+ {in : `\u0000` , out : 0 , len : 6 },
30
+ {in : `\u00b0` , out : '°' , len : 6 },
31
+ {in : `\u00B0` , out : '°' , len : 6 },
32
+
33
+ {in : `\x1234` , out : 0x1234 , len : 6 }, // These functions do not check the \u prefix
34
+
35
+ {in : `` , isErr : true },
36
+ {in : `\` , isErr : true },
37
+ {in : `\u` , isErr : true },
38
+ {in : `\u1` , isErr : true },
39
+ {in : `\u11` , isErr : true },
40
+ {in : `\u111` , isErr : true },
41
+ {in : `\u123X` , isErr : true },
42
+ }
43
+
44
+ var singleUnicodeEscapeTests = append ([]escapedUnicodeRuneTest {
45
+ {in : `\uD83D` , out : 0xD83D , len : 6 },
46
+ {in : `\uDE03` , out : 0xDE03 , len : 6 },
47
+ {in : `\uFFFF` , out : 0xFFFF , len : 6 },
48
+ }, commonUnicodeEscapeTests ... )
49
+
50
+ var multiUnicodeEscapeTests = append ([]escapedUnicodeRuneTest {
51
+ {in : `\uD83D` , isErr : true },
52
+ {in : `\uDE03` , isErr : true },
53
+ {in : `\uFFFF` , isErr : true },
54
+
55
+ {in : `\uD83D\uDE03` , out : '\U0001F603' , len : 12 },
56
+ {in : `\uD800\uDC00` , out : '\U00010000' , len : 12 },
57
+
58
+ {in : `\uD800\` , isErr : true },
59
+ {in : `\uD800\u` , isErr : true },
60
+ {in : `\uD800\uD` , isErr : true },
61
+ {in : `\uD800\uDC` , isErr : true },
62
+ {in : `\uD800\uDC0` , isErr : true },
63
+ }, commonUnicodeEscapeTests ... )
64
+
18
65
func TestDecodeSingleUnicodeEscape (t * testing.T ) {
19
- escapeSequences := []string {
20
- `\"` ,
21
- `\\` ,
22
- `\n` ,
23
- `\t` ,
24
- `\r` ,
25
- `\/` ,
26
- `\b` ,
27
- `\f` ,
66
+ for _ , test := range singleUnicodeEscapeTests {
67
+ r , ok := decodeSingleUnicodeEscape ([]byte (test .in ))
68
+ isErr := ! ok
69
+
70
+ if isErr != test .isErr {
71
+ t .Errorf ("decodeSingleUnicodeEscape(%s) returned isErr mismatch: expected %t, obtained %t" , test .in , test .isErr , isErr )
72
+ } else if isErr {
73
+ continue
74
+ } else if r != test .out {
75
+ t .Errorf ("decodeSingleUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)" , test .in , test .out , test .out , r , r )
76
+ }
28
77
}
78
+ }
79
+
80
+ func TestDecodeUnicodeEscape (t * testing.T ) {
81
+ for _ , test := range multiUnicodeEscapeTests {
82
+ r , len := decodeUnicodeEscape ([]byte (test .in ))
83
+ isErr := (len == - 1 )
29
84
30
- runeValues := []struct {
31
- r rune
32
- ok bool
33
- }{
34
- {'"' , true },
35
- {'\\' , true },
36
- {'\n' , true },
37
- {'\t' , true },
38
- {'/' , true },
39
- {'\b' , true },
40
- {'\f' , true },
85
+ if isErr != test .isErr {
86
+ t .Errorf ("decodeUnicodeEscape(%s) returned isErr mismatch: expected %t, obtained %t" , test .in , test .isErr , isErr )
87
+ } else if isErr {
88
+ continue
89
+ } else if len != test .len {
90
+ t .Errorf ("decodeUnicodeEscape(%s) returned length mismatch: expected %d, obtained %d" , test .in , test .len , len )
91
+ } else if r != test .out {
92
+ t .Errorf ("decodeUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)" , test .in , test .out , test .out , r , r )
93
+ }
41
94
}
95
+ }
96
+
97
+ type unescapeTest struct {
98
+ in string
99
+ out string
100
+ canAlloc bool
101
+ isErr bool
102
+ }
42
103
43
- for i , esc := range escapeSequences {
44
- expected := runeValues [i ]
45
- if r , ok := decodeSingleUnicodeEscape ([]byte (esc )); ok != expected .ok {
46
- t .Errorf ("decodeSingleUnicodeEscape(%s) returned 'ok' mismatch: expected %t, obtained %t" , esc , expected .ok , ok )
47
- } else if r != expected .r {
48
- t .Errorf ("decodeSingleUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)" , esc , expected .r , expected .r , r , r )
104
+ var unescapeTests = []unescapeTest {
105
+ {in : `` , out : `` , canAlloc : false },
106
+ {in : `a` , out : `a` , canAlloc : false },
107
+ {in : `abcde` , out : `abcde` , canAlloc : false },
108
+
109
+ {in : `ab\\de` , out : `ab\de` , canAlloc : true },
110
+ {in : `ab\"de` , out : `ab"de` , canAlloc : true },
111
+ {in : `ab \u00B0 de` , out : `ab ° de` , canAlloc : true },
112
+ {in : `ab \uD83D\uDE03 de` , out : "ab \U0001F603 de" , canAlloc : true },
113
+ {in : `\u0000\u0000\u0000\u0000\u0000` , out : "\u0000 \u0000 \u0000 \u0000 \u0000 " , canAlloc : true },
114
+ {in : `\u0000 \u0000 \u0000 \u0000 \u0000` , out : "\u0000 \u0000 \u0000 \u0000 \u0000 " , canAlloc : true },
115
+ {in : ` \u0000 \u0000 \u0000 \u0000 \u0000 ` , out : " \u0000 \u0000 \u0000 \u0000 \u0000 " , canAlloc : true },
116
+
117
+ {in : `\uD800` , isErr : true },
118
+ {in : `\uFFFF` , isErr : true },
119
+ {in : `abcde\` , isErr : true },
120
+ {in : `abcde\x` , isErr : true },
121
+ {in : `abcde\u` , isErr : true },
122
+ {in : `abcde\u1` , isErr : true },
123
+ {in : `abcde\u12` , isErr : true },
124
+ {in : `abcde\u123` , isErr : true },
125
+ {in : `abcde\uD800` , isErr : true },
126
+ {in : `ab\uD800de` , isErr : true },
127
+ {in : `\uD800abcde` , isErr : true },
128
+ }
129
+
130
+ // isSameMemory checks if two slices contain the same memory pointer (meaning one is a
131
+ // subslice of the other, with possibly differing lengths/capacities).
132
+ func isSameMemory (a , b []byte ) bool {
133
+ if cap (a ) == 0 || cap (b ) == 0 {
134
+ return cap (a ) == cap (b )
135
+ } else if a , b = a [:1 ], b [:1 ]; a [0 ] != b [0 ] {
136
+ return false
137
+ } else {
138
+ a [0 ]++
139
+ same := (a [0 ] == b [0 ])
140
+ a [0 ]--
141
+ return same
142
+ }
143
+
144
+ }
145
+
146
+ func TestUnescape (t * testing.T ) {
147
+
148
+ for _ , test := range unescapeTests {
149
+ type bufferTestCase struct {
150
+ buf []byte
151
+ isTooSmall bool
152
+ }
153
+
154
+ var bufs []bufferTestCase
155
+
156
+ if len (test .in ) == 0 {
157
+ // If the input string is length 0, only a buffer of size 0 is a meaningful test
158
+ bufs = []bufferTestCase {{nil , false }}
159
+ } else {
160
+ // For non-empty input strings, we can try several buffer sizes (0, len-1, len)
161
+ bufs = []bufferTestCase {
162
+ {nil , true },
163
+ {make ([]byte , 0 , len (test .in )- 1 ), true },
164
+ {make ([]byte , 0 , len (test .in )), false },
165
+ }
166
+ }
167
+
168
+ for _ , buftest := range bufs {
169
+ in := []byte (test .in )
170
+ buf := buftest .buf
171
+
172
+ out , err := unescape (in , buf )
173
+ isErr := (err != nil )
174
+ isAlloc := ! isSameMemory (out , in ) && ! isSameMemory (out , buf )
175
+
176
+ if isErr != test .isErr {
177
+ t .Errorf ("unescape(`%s`, bufsize=%d) returned isErr mismatch: expected %t, obtained %t" , test .in , cap (buf ), test .isErr , isErr )
178
+ break
179
+ } else if isErr {
180
+ continue
181
+ } else if ! bytes .Equal (out , []byte (test .out )) {
182
+ t .Errorf ("unescape(`%s`, bufsize=%d) returned unescaped mismatch: expected `%s` (%v, len %d), obtained `%s` (%v, len %d)" , test .in , cap (buf ), test .out , []byte (test .out ), len (test .out ), string (out ), out , len (out ))
183
+ break
184
+ } else if isAlloc != (test .canAlloc && buftest .isTooSmall ) {
185
+ t .Errorf ("unescape(`%s`, bufsize=%d) returned isAlloc mismatch: expected %t, obtained %t" , test .in , cap (buf ), buftest .isTooSmall , isAlloc )
186
+ break
187
+ }
49
188
}
50
189
}
51
190
}
191
+
192
+ //
193
+ //escapeSequences := []string{
194
+ //`\"`,
195
+ //`\\`,
196
+ //`\n`,
197
+ //`\t`,
198
+ //`\r`,
199
+ //`\/`,
200
+ //`\b`,
201
+ //`\f`,
202
+ //}
203
+ //
204
+ //runeValues := []struct {
205
+ //r rune
206
+ //ok bool
207
+ //}{
208
+ //{'"', true},
209
+ //{'\\', true},
210
+ //{'\n', true},
211
+ //{'\t', true},
212
+ //{'/', true},
213
+ //{'\b', true},
214
+ //{'\f', true},
215
+ //}
0 commit comments