Skip to content

Commit bba8a10

Browse files
authored
Fix JSON parsing of escaped strings (#7545)
To find the end of a string, we must be more careful of escaping - we assumed any \" was an escaped double-quote, but it might be part of \\", that is, where there is an escaped \ before us, and the double-quote is not escaped itself.
1 parent 2f90ad1 commit bba8a10

File tree

3 files changed

+26
-8
lines changed

3 files changed

+26
-8
lines changed

scripts/test/fuzzing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@
128128
'type-refining-gufa-exact.wast',
129129
# TODO: fuzzer support for custom descriptors
130130
'custom-descriptors.wast',
131+
# TODO: fix split_wast() on tricky escaping situations like a string ending
132+
# in \\" (the " is not escaped - there is an escaped \ before it)
133+
'string-lifting-section.wast',
131134
]
132135

133136

src/support/json.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -282,14 +282,20 @@ struct Value {
282282
skip();
283283
if (*curr == '"') {
284284
// String
285-
// Start |close| at the opening ", and in the loop below we will always
285+
// Start |close| after the opening ", and in the loop below we will always
286286
// begin looking at the first character after.
287-
char* close = curr;
288-
// Skip escaped "
289-
do {
290-
close = strchr(close + 1, '"');
291-
} while (*(close - 1) == '\\');
292-
THROW_IF(!close, "malformed JSON string");
287+
char* close = curr + 1;
288+
// Skip escaped ", which appears as \". We need to be careful though, as
289+
// \" might also be \\" which would be an escaped \ and an *un*escaped ".
290+
while (*close && *close != '"') {
291+
if (*close == '\\') {
292+
// Skip the \ and the character after it, which it escapes.
293+
close++;
294+
THROW_IF(!*close, "unexpected end of JSON string (quoting)");
295+
}
296+
close++;
297+
}
298+
THROW_IF(!close, "unexpected end of JSON string");
293299
*close = 0; // end this string, and reuse it straight from the input
294300
char* raw = curr + 1;
295301
if (stringEncoding == ASCII) {

test/lit/passes/string-lifting-section.wast

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
;; Lower first to generate the string.consts custom section, then lift it back.
44

5-
;; RUN: foreach %s %t wasm-opt -all --string-lowering --string-lifting -S -o - | filecheck %s
5+
;; RUN: wasm-opt %s -all --string-lowering --string-lifting -S -o - | filecheck %s
66

77
(module
88
;; CHECK: (type $0 (array (mut i16)))
@@ -37,6 +37,8 @@
3737

3838
;; CHECK: (import "string.const" "5" (global $"string.const_\"unpaired low surrogate \\ed\\bd\\88 \"" (ref extern)))
3939

40+
;; CHECK: (import "string.const" "6" (global $"string.const_\"z\\\\\"" (ref extern)))
41+
4042
;; CHECK: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $3) (param (ref null $0) i32 i32) (result (ref extern))))
4143

4244
;; CHECK: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint (type $4) (param i32) (result (ref extern))))
@@ -94,6 +96,9 @@
9496
;; CHECK-NEXT: (drop
9597
;; CHECK-NEXT: (string.const "unpaired low surrogate \ed\bd\88 ")
9698
;; CHECK-NEXT: )
99+
;; CHECK-NEXT: (drop
100+
;; CHECK-NEXT: (string.const "z\\")
101+
;; CHECK-NEXT: )
97102
;; CHECK-NEXT: )
98103
(func $tricky-consts
99104
;; These tricky strings should remain exactly the same after lowering and
@@ -110,6 +115,10 @@
110115
(drop
111116
(string.const "unpaired low surrogate \ED\BD\88 ")
112117
)
118+
;; A string with \", but the " is not escaped, as the \ is part of \\.
119+
(drop
120+
(string.const "z\\")
121+
)
113122
)
114123
)
115124

0 commit comments

Comments
 (0)