1
1
use bumpalo:: Bump ;
2
2
use hashbrown:: HashSet ;
3
+ use std:: alloc:: Layout ;
4
+ use std:: fmt;
5
+ use std:: ptr;
3
6
use std:: sync:: Mutex ;
4
7
5
8
pub trait InternString {
6
- fn to_interned ( s : & ' static str ) -> Self ;
9
+ unsafe fn to_interned ( s : ArenaStr ) -> Self ;
7
10
}
8
11
9
12
#[ macro_export]
10
13
macro_rules! intern {
11
14
( pub struct $for_ty: ident) => {
12
- #[ derive( Serialize , Debug , PartialOrd , Ord , Copy , Clone ) ]
13
- pub struct $for_ty( & ' static str ) ;
15
+ #[ derive( Serialize , Debug , Copy , Clone ) ]
16
+ pub struct $for_ty( crate :: intern :: ArenaStr ) ;
14
17
15
18
impl std:: cmp:: PartialEq for $for_ty {
16
19
fn eq( & self , other: & Self ) -> bool {
17
- std :: ptr :: eq ( self . 0 . as_ptr ( ) , other. 0 . as_ptr ( ) )
20
+ self . 0 . hash_ptr ( ) == other. 0 . hash_ptr ( )
18
21
}
19
22
}
20
23
21
24
impl std:: cmp:: Eq for $for_ty { }
22
25
26
+ impl std:: cmp:: PartialOrd for $for_ty {
27
+ fn partial_cmp( & self , other: & Self ) -> Option <std:: cmp:: Ordering > {
28
+ Some ( self . cmp( other) )
29
+ }
30
+ }
31
+
32
+ impl std:: cmp:: Ord for $for_ty {
33
+ fn cmp( & self , other: & Self ) -> std:: cmp:: Ordering {
34
+ self . 0 . as_str( ) . cmp( other. 0 . as_str( ) )
35
+ }
36
+ }
37
+
23
38
impl std:: hash:: Hash for $for_ty {
24
39
fn hash<H : std:: hash:: Hasher >( & self , state: & mut H ) {
25
- state. write_usize( self . 0 . as_ptr ( ) as usize ) ;
40
+ state. write_usize( self . 0 . hash_ptr ( ) ) ;
26
41
}
27
42
}
28
43
@@ -55,13 +70,13 @@ macro_rules! intern {
55
70
56
71
impl std:: cmp:: PartialEq <str > for $for_ty {
57
72
fn eq( & self , other: & str ) -> bool {
58
- self . 0 == other
73
+ self . 0 . as_str ( ) == other
59
74
}
60
75
}
61
76
62
77
impl std:: fmt:: Display for $for_ty {
63
78
fn fmt( & self , f: & mut std:: fmt:: Formatter <' _>) -> std:: fmt:: Result {
64
- write!( f, "{}" , self . 0 )
79
+ write!( f, "{}" , self . 0 . as_str ( ) )
65
80
}
66
81
}
67
82
@@ -74,28 +89,84 @@ macro_rules! intern {
74
89
impl std:: ops:: Deref for $for_ty {
75
90
type Target = str ;
76
91
fn deref( & self ) -> & str {
77
- self . 0
92
+ self . 0 . as_str ( )
78
93
}
79
94
}
80
95
81
96
impl crate :: intern:: InternString for $for_ty {
82
- fn to_interned( v: & ' static str ) -> $for_ty {
97
+ unsafe fn to_interned( v: crate :: intern :: ArenaStr ) -> $for_ty {
83
98
$for_ty( v)
84
99
}
85
100
}
86
101
} ;
87
102
}
88
103
89
104
lazy_static:: lazy_static! {
90
- static ref INTERNED : Mutex <( HashSet <& ' static str >, Bump ) >
105
+ static ref INTERNED : Mutex <( HashSet <ArenaStr >, Bump ) >
91
106
= Mutex :: new( ( HashSet :: new( ) , Bump :: new( ) ) ) ;
92
107
}
93
108
94
109
pub fn intern < T : InternString > ( value : & str ) -> T {
95
110
let mut guard = INTERNED . lock ( ) . unwrap ( ) ;
96
111
97
112
let ( ref mut set, ref arena) = & mut * guard;
98
- T :: to_interned ( set. get_or_insert_with ( value, |_| -> & ' static str {
99
- unsafe { std:: mem:: transmute :: < & str , & ' static str > ( arena. alloc_str ( value) ) }
100
- } ) )
113
+ unsafe {
114
+ T :: to_interned ( * set. get_or_insert_with ( value, |_| -> ArenaStr {
115
+ let ptr = arena. alloc_layout (
116
+ Layout :: from_size_align ( std:: mem:: size_of :: < usize > ( ) + value. len ( ) , 1 ) . unwrap ( ) ,
117
+ ) ;
118
+ let start_at = ptr. as_ptr ( ) ;
119
+ ptr:: write ( start_at as * mut _ , value. len ( ) . to_ne_bytes ( ) ) ;
120
+ let bytes = start_at. add ( std:: mem:: size_of :: < usize > ( ) ) ;
121
+ ptr:: copy_nonoverlapping ( value. as_ptr ( ) , bytes, value. len ( ) ) ;
122
+
123
+ ArenaStr ( start_at as * const u8 )
124
+ } ) )
125
+ }
126
+ }
127
+
128
+ #[ derive( serde:: Serialize , Copy , Clone , PartialEq , Eq ) ]
129
+ #[ serde( into = "&'static str" ) ]
130
+ pub struct ArenaStr ( * const u8 ) ;
131
+
132
+ impl Into < & ' static str > for ArenaStr {
133
+ fn into ( self ) -> & ' static str {
134
+ self . as_str ( )
135
+ }
136
+ }
137
+
138
+ unsafe impl Send for ArenaStr { }
139
+ unsafe impl Sync for ArenaStr { }
140
+
141
+ impl ArenaStr {
142
+ pub fn as_str ( self ) -> & ' static str {
143
+ unsafe {
144
+ let mut ptr = self . 0 ;
145
+ let length = usize:: from_ne_bytes ( ptr:: read ( ptr as * const _ ) ) ;
146
+ ptr = ptr. add ( std:: mem:: size_of :: < usize > ( ) ) ;
147
+ std:: str:: from_utf8_unchecked ( std:: slice:: from_raw_parts ( ptr, length) )
148
+ }
149
+ }
150
+
151
+ pub fn hash_ptr ( self ) -> usize {
152
+ self . 0 as usize
153
+ }
154
+ }
155
+
156
+ impl fmt:: Debug for ArenaStr {
157
+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
158
+ fmt:: Debug :: fmt ( self . as_str ( ) , f)
159
+ }
160
+ }
161
+
162
+ impl std:: hash:: Hash for ArenaStr {
163
+ fn hash < H : std:: hash:: Hasher > ( & self , state : & mut H ) {
164
+ self . as_str ( ) . hash ( state) ;
165
+ }
166
+ }
167
+
168
+ impl std:: borrow:: Borrow < str > for ArenaStr {
169
+ fn borrow ( & self ) -> & str {
170
+ self . as_str ( )
171
+ }
101
172
}
0 commit comments