Skip to content

Commit 623d9b4

Browse files
authored
Merge pull request #1273 from conradludgate/optimise-string-escaping
string serialization escaping optimisations
2 parents cd55b5a + de70b7d commit 623d9b4

File tree

1 file changed

+47
-41
lines changed

1 file changed

+47
-41
lines changed

src/ser.rs

Lines changed: 47 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,23 +1534,6 @@ pub enum CharEscape {
15341534
AsciiControl(u8),
15351535
}
15361536

1537-
impl CharEscape {
1538-
#[inline]
1539-
fn from_escape_table(escape: u8, byte: u8) -> CharEscape {
1540-
match escape {
1541-
self::BB => CharEscape::Backspace,
1542-
self::TT => CharEscape::Tab,
1543-
self::NN => CharEscape::LineFeed,
1544-
self::FF => CharEscape::FormFeed,
1545-
self::RR => CharEscape::CarriageReturn,
1546-
self::QU => CharEscape::Quote,
1547-
self::BS => CharEscape::ReverseSolidus,
1548-
self::UU => CharEscape::AsciiControl(byte),
1549-
_ => unreachable!(),
1550-
}
1551-
}
1552-
}
1553-
15541537
/// This trait abstracts away serializing the JSON control characters, which allows the user to
15551538
/// optionally pretty print the JSON output.
15561539
pub trait Formatter {
@@ -1784,30 +1767,33 @@ pub trait Formatter {
17841767
{
17851768
use self::CharEscape::*;
17861769

1787-
let s = match char_escape {
1788-
Quote => b"\\\"",
1789-
ReverseSolidus => b"\\\\",
1790-
Solidus => b"\\/",
1791-
Backspace => b"\\b",
1792-
FormFeed => b"\\f",
1793-
LineFeed => b"\\n",
1794-
CarriageReturn => b"\\r",
1795-
Tab => b"\\t",
1770+
let escape_char = match char_escape {
1771+
Quote => b'"',
1772+
ReverseSolidus => b'\\',
1773+
Solidus => b'/',
1774+
Backspace => b'b',
1775+
FormFeed => b'f',
1776+
LineFeed => b'n',
1777+
CarriageReturn => b'r',
1778+
Tab => b't',
1779+
AsciiControl(_) => b'u',
1780+
};
1781+
1782+
match char_escape {
17961783
AsciiControl(byte) => {
17971784
static HEX_DIGITS: [u8; 16] = *b"0123456789abcdef";
17981785
let bytes = &[
17991786
b'\\',
1800-
b'u',
1787+
escape_char,
18011788
b'0',
18021789
b'0',
18031790
HEX_DIGITS[(byte >> 4) as usize],
18041791
HEX_DIGITS[(byte & 0xF) as usize],
18051792
];
1806-
return writer.write_all(bytes);
1793+
writer.write_all(bytes)
18071794
}
1808-
};
1809-
1810-
writer.write_all(s)
1795+
_ => writer.write_all(&[b'\\', escape_char]),
1796+
}
18111797
}
18121798

18131799
/// Writes the representation of a byte array. Formatters can choose whether
@@ -2097,31 +2083,51 @@ where
20972083
W: ?Sized + io::Write,
20982084
F: ?Sized + Formatter,
20992085
{
2100-
let bytes = value.as_bytes();
2086+
let mut bytes = value.as_bytes();
21012087

2102-
let mut start = 0;
2088+
let mut i = 0;
2089+
while i < bytes.len() {
2090+
let (string_run, rest) = bytes.split_at(i);
2091+
let (&byte, rest) = rest.split_first().unwrap();
21032092

2104-
for (i, &byte) in bytes.iter().enumerate() {
21052093
let escape = ESCAPE[byte as usize];
2094+
2095+
i += 1;
21062096
if escape == 0 {
21072097
continue;
21082098
}
21092099

2110-
if start < i {
2111-
tri!(formatter.write_string_fragment(writer, &value[start..i]));
2100+
bytes = rest;
2101+
i = 0;
2102+
2103+
// safety: string_run is a valid utf8 string, since we only split on ascii sequences
2104+
let string_run = unsafe { core::str::from_utf8_unchecked(string_run) };
2105+
if !string_run.is_empty() {
2106+
tri!(formatter.write_string_fragment(writer, string_run));
21122107
}
21132108

2114-
let char_escape = CharEscape::from_escape_table(escape, byte);
2109+
let char_escape = match escape {
2110+
self::BB => CharEscape::Backspace,
2111+
self::TT => CharEscape::Tab,
2112+
self::NN => CharEscape::LineFeed,
2113+
self::FF => CharEscape::FormFeed,
2114+
self::RR => CharEscape::CarriageReturn,
2115+
self::QU => CharEscape::Quote,
2116+
self::BS => CharEscape::ReverseSolidus,
2117+
self::UU => CharEscape::AsciiControl(byte),
2118+
// safety: the escape table does not contain any other type of character.
2119+
_ => unsafe { core::hint::unreachable_unchecked() },
2120+
};
21152121
tri!(formatter.write_char_escape(writer, char_escape));
2116-
2117-
start = i + 1;
21182122
}
21192123

2120-
if start == bytes.len() {
2124+
// safety: bytes is a valid utf8 string, since we only split on ascii sequences
2125+
let string_run = unsafe { core::str::from_utf8_unchecked(bytes) };
2126+
if string_run.is_empty() {
21212127
return Ok(());
21222128
}
21232129

2124-
formatter.write_string_fragment(writer, &value[start..])
2130+
formatter.write_string_fragment(writer, string_run)
21252131
}
21262132

21272133
const BB: u8 = b'b'; // \x08

0 commit comments

Comments
 (0)