Skip to content

Commit f10a4ca

Browse files
Change interned strings to be thin pointers
This moves the length of the associated string into the arena allocation.
1 parent 91c55e7 commit f10a4ca

File tree

1 file changed

+84
-13
lines changed

1 file changed

+84
-13
lines changed

collector/src/intern.rs

Lines changed: 84 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,43 @@
11
use bumpalo::Bump;
22
use hashbrown::HashSet;
3+
use std::alloc::Layout;
4+
use std::fmt;
5+
use std::ptr;
36
use std::sync::Mutex;
47

58
pub trait InternString {
6-
fn to_interned(s: &'static str) -> Self;
9+
unsafe fn to_interned(s: ArenaStr) -> Self;
710
}
811

912
#[macro_export]
1013
macro_rules! intern {
1114
(pub struct $for_ty:ident) => {
12-
#[derive(Serialize, Debug, PartialOrd, Ord, Copy, Clone)]
13-
pub struct $for_ty(&'static str);
15+
#[derive(Serialize, Debug, Copy, Clone)]
16+
pub struct $for_ty(crate::intern::ArenaStr);
1417

1518
impl std::cmp::PartialEq for $for_ty {
1619
fn eq(&self, other: &Self) -> bool {
17-
std::ptr::eq(self.0.as_ptr(), other.0.as_ptr())
20+
self.0.hash_ptr() == other.0.hash_ptr()
1821
}
1922
}
2023

2124
impl std::cmp::Eq for $for_ty {}
2225

26+
impl std::cmp::PartialOrd for $for_ty {
27+
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
28+
Some(self.cmp(other))
29+
}
30+
}
31+
32+
impl std::cmp::Ord for $for_ty {
33+
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
34+
self.0.as_str().cmp(other.0.as_str())
35+
}
36+
}
37+
2338
impl std::hash::Hash for $for_ty {
2439
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
25-
state.write_usize(self.0.as_ptr() as usize);
40+
state.write_usize(self.0.hash_ptr());
2641
}
2742
}
2843

@@ -55,13 +70,13 @@ macro_rules! intern {
5570

5671
impl std::cmp::PartialEq<str> for $for_ty {
5772
fn eq(&self, other: &str) -> bool {
58-
self.0 == other
73+
self.0.as_str() == other
5974
}
6075
}
6176

6277
impl std::fmt::Display for $for_ty {
6378
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64-
write!(f, "{}", self.0)
79+
write!(f, "{}", self.0.as_str())
6580
}
6681
}
6782

@@ -74,28 +89,84 @@ macro_rules! intern {
7489
impl std::ops::Deref for $for_ty {
7590
type Target = str;
7691
fn deref(&self) -> &str {
77-
self.0
92+
self.0.as_str()
7893
}
7994
}
8095

8196
impl crate::intern::InternString for $for_ty {
82-
fn to_interned(v: &'static str) -> $for_ty {
97+
unsafe fn to_interned(v: crate::intern::ArenaStr) -> $for_ty {
8398
$for_ty(v)
8499
}
85100
}
86101
};
87102
}
88103

89104
lazy_static::lazy_static! {
90-
static ref INTERNED: Mutex<(HashSet<&'static str>, Bump)>
105+
static ref INTERNED: Mutex<(HashSet<ArenaStr>, Bump)>
91106
= Mutex::new((HashSet::new(), Bump::new()));
92107
}
93108

94109
pub fn intern<T: InternString>(value: &str) -> T {
95110
let mut guard = INTERNED.lock().unwrap();
96111

97112
let (ref mut set, ref arena) = &mut *guard;
98-
T::to_interned(set.get_or_insert_with(value, |_| -> &'static str {
99-
unsafe { std::mem::transmute::<&str, &'static str>(arena.alloc_str(value)) }
100-
}))
113+
unsafe {
114+
T::to_interned(*set.get_or_insert_with(value, |_| -> ArenaStr {
115+
let ptr = arena.alloc_layout(
116+
Layout::from_size_align(std::mem::size_of::<usize>() + value.len(), 1).unwrap(),
117+
);
118+
let start_at = ptr.as_ptr();
119+
ptr::write(start_at as *mut _, value.len().to_ne_bytes());
120+
let bytes = start_at.add(std::mem::size_of::<usize>());
121+
ptr::copy_nonoverlapping(value.as_ptr(), bytes, value.len());
122+
123+
ArenaStr(start_at as *const u8)
124+
}))
125+
}
126+
}
127+
128+
#[derive(serde::Serialize, Copy, Clone, PartialEq, Eq)]
129+
#[serde(into = "&'static str")]
130+
pub struct ArenaStr(*const u8);
131+
132+
impl Into<&'static str> for ArenaStr {
133+
fn into(self) -> &'static str {
134+
self.as_str()
135+
}
136+
}
137+
138+
unsafe impl Send for ArenaStr {}
139+
unsafe impl Sync for ArenaStr {}
140+
141+
impl ArenaStr {
142+
pub fn as_str(self) -> &'static str {
143+
unsafe {
144+
let mut ptr = self.0;
145+
let length = usize::from_ne_bytes(ptr::read(ptr as *const _));
146+
ptr = ptr.add(std::mem::size_of::<usize>());
147+
std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr, length))
148+
}
149+
}
150+
151+
pub fn hash_ptr(self) -> usize {
152+
self.0 as usize
153+
}
154+
}
155+
156+
impl fmt::Debug for ArenaStr {
157+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158+
fmt::Debug::fmt(self.as_str(), f)
159+
}
160+
}
161+
162+
impl std::hash::Hash for ArenaStr {
163+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
164+
self.as_str().hash(state);
165+
}
166+
}
167+
168+
impl std::borrow::Borrow<str> for ArenaStr {
169+
fn borrow(&self) -> &str {
170+
self.as_str()
171+
}
101172
}

0 commit comments

Comments
 (0)