Skip to content

Commit d398961

Browse files
Avoid allocating strings when we can just reference the data in the string table directly.
1 parent 76cf3ea commit d398961

File tree

2 files changed

+24
-3
lines changed

2 files changed

+24
-3
lines changed

analyzeme/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ license = "MIT OR Apache-2.0"
77

88
[dependencies]
99
byteorder = "1.2.7"
10+
memchr = "2"
1011
measureme = { path = "../measureme" }
1112
rustc-hash = "1.0.1"
1213
serde = { version = "1.0", features = [ "derive" ] }

analyzeme/src/stringtable.rs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use measureme::{Addr, StringId};
1010
use rustc_hash::FxHashMap;
1111
use std::borrow::Cow;
1212
use std::error::Error;
13+
use memchr::memchr;
1314

1415
// See module-level documentation for more information on the encoding.
1516
const UTF8_CONTINUATION_MASK: u8 = 0b1100_0000;
@@ -30,9 +31,28 @@ pub struct StringRef<'st> {
3031

3132
impl<'st> StringRef<'st> {
3233
pub fn to_string(&self) -> Cow<'st, str> {
33-
let mut output = String::new();
34-
self.write_to_string(&mut output);
35-
Cow::from(output)
34+
35+
// Try to avoid the allocation, which we can do if this is a
36+
// [value, 0xFF] entry.
37+
let addr = self.table.index[&self.id];
38+
let pos = addr.as_usize();
39+
let slice_to_search = &self.table.string_data[pos..];
40+
41+
// Find the first 0xFF byte which which is either the sequence
42+
// terminator or a byte in the middle of string id. Use `memchr` which
43+
// is super fast.
44+
let terminator_pos = memchr(TERMINATOR, slice_to_search).unwrap();
45+
46+
// Decode the bytes until the terminator. If there is a string id in
47+
// between somewhere this will fail, and we fall back to the allocating
48+
// path.
49+
if let Ok(s) = std::str::from_utf8(&slice_to_search[..terminator_pos]) {
50+
Cow::from(s)
51+
} else {
52+
let mut output = String::new();
53+
self.write_to_string(&mut output);
54+
Cow::from(output)
55+
}
3656
}
3757

3858
pub fn write_to_string(&self, output: &mut String) {

0 commit comments

Comments
 (0)