Skip to content

Commit 6ee950d

Browse files
committed
proc_macro: stop using a remote object handle for Ident
Doing this for all unicode identifiers would require a dependency on `unicode-normalization` and `rustc_lexer`, which is currently not possible for `proc_macro` due to it being built concurrently with `std` and `core`. Instead, ASCII identifiers are validated locally, and an RPC message is used to validate unicode identifiers when needed. String values are interned on the both the server and client when deserializing, to avoid unnecessary copies and keep Ident cheap to copy and move. This appears to be important for performance. The client-side interner is based roughly on the one from rustc_span, and uses an arena inspired by rustc_arena. RPC messages passing symbols always include the full value. This could potentially be optimized in the future if it is revealed to be a performance bottleneck. Despite now having a relevant implementaion of Display for Ident, ToString is still specialized, as it is a hot-path for this object. The symbol infrastructure will also be used for literals in the next part.
1 parent b8b41e2 commit 6ee950d

File tree

6 files changed

+395
-29
lines changed

6 files changed

+395
-29
lines changed

proc_macro/src/bridge/arena.rs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
//! A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
2+
//!
3+
//! This is unfortunately a minimal re-implementation rather than a dependency
4+
//! as it is difficult to depend on crates from within `proc_macro`, due to it
5+
//! being built at the same time as `std`.
6+
7+
use std::cell::{Cell, RefCell};
8+
use std::cmp;
9+
use std::mem::MaybeUninit;
10+
use std::ops::Range;
11+
use std::ptr;
12+
use std::slice;
13+
use std::str;
14+
15+
// The arenas start with PAGE-sized chunks, and then each new chunk is twice as
16+
// big as its predecessor, up until we reach HUGE_PAGE-sized chunks, whereupon
17+
// we stop growing. This scales well, from arenas that are barely used up to
18+
// arenas that are used for 100s of MiBs. Note also that the chosen sizes match
19+
// the usual sizes of pages and huge pages on Linux.
20+
const PAGE: usize = 4096;
21+
const HUGE_PAGE: usize = 2 * 1024 * 1024;
22+
23+
/// A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
24+
///
25+
/// This is unfortunately a complete re-implementation rather than a dependency
26+
/// as it is difficult to depend on crates from within `proc_macro`, due to it
27+
/// being built at the same time as `std`.
28+
///
29+
/// This arena doesn't have support for allocating anything other than byte
30+
/// slices, as that is all that is necessary.
31+
pub(crate) struct Arena {
32+
start: Cell<*mut MaybeUninit<u8>>,
33+
end: Cell<*mut MaybeUninit<u8>>,
34+
chunks: RefCell<Vec<Box<[MaybeUninit<u8>]>>>,
35+
}
36+
37+
impl Arena {
38+
pub(crate) fn new() -> Self {
39+
Arena {
40+
start: Cell::new(ptr::null_mut()),
41+
end: Cell::new(ptr::null_mut()),
42+
chunks: RefCell::new(Vec::new()),
43+
}
44+
}
45+
46+
/// Add a new chunk with at least `additional` free bytes.
47+
#[inline(never)]
48+
#[cold]
49+
fn grow(&self, additional: usize) {
50+
let mut chunks = self.chunks.borrow_mut();
51+
let mut new_cap;
52+
if let Some(last_chunk) = chunks.last_mut() {
53+
// If the previous chunk's len is less than HUGE_PAGE
54+
// bytes, then this chunk will be least double the previous
55+
// chunk's size.
56+
new_cap = last_chunk.len().min(HUGE_PAGE / 2);
57+
new_cap *= 2;
58+
} else {
59+
new_cap = PAGE;
60+
}
61+
// Also ensure that this chunk can fit `additional`.
62+
new_cap = cmp::max(additional, new_cap);
63+
64+
let mut chunk = Box::new_uninit_slice(new_cap);
65+
let Range { start, end } = chunk.as_mut_ptr_range();
66+
self.start.set(start);
67+
self.end.set(end);
68+
chunks.push(chunk);
69+
}
70+
71+
/// Allocates a byte slice with specified size from the current memory
72+
/// chunk. Returns `None` if there is no free space left to satisfy the
73+
/// request.
74+
fn alloc_raw_without_grow(&self, bytes: usize) -> Option<&mut [MaybeUninit<u8>]> {
75+
let start = self.start.get().addr();
76+
let old_end = self.end.get();
77+
let end = old_end.addr();
78+
79+
let new_end = end.checked_sub(bytes)?;
80+
if start <= new_end {
81+
let new_end = old_end.with_addr(new_end);
82+
self.end.set(new_end);
83+
// SAFETY: `bytes` bytes starting at `new_end` were just reserved.
84+
Some(unsafe { slice::from_raw_parts_mut(new_end, bytes) })
85+
} else {
86+
None
87+
}
88+
}
89+
90+
fn alloc_raw(&self, bytes: usize) -> &mut [MaybeUninit<u8>] {
91+
if bytes == 0 {
92+
return &mut [];
93+
}
94+
95+
loop {
96+
if let Some(a) = self.alloc_raw_without_grow(bytes) {
97+
break a;
98+
}
99+
// No free space left. Allocate a new chunk to satisfy the request.
100+
// On failure the grow will panic or abort.
101+
self.grow(bytes);
102+
}
103+
}
104+
105+
pub(crate) fn alloc_str<'a>(&'a self, string: &str) -> &'a mut str {
106+
let alloc = self.alloc_raw(string.len());
107+
let bytes = MaybeUninit::write_slice(alloc, string.as_bytes());
108+
109+
// SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
110+
// and immediately convert the clone back to `&str`.
111+
unsafe { str::from_utf8_unchecked_mut(bytes) }
112+
}
113+
}

proc_macro/src/bridge/client.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ define_handles! {
181181
Diagnostic,
182182

183183
'interned:
184-
Ident,
185184
Span,
186185
}
187186

@@ -242,6 +241,8 @@ impl fmt::Debug for Span {
242241
}
243242
}
244243

244+
pub(crate) use super::symbol::Symbol;
245+
245246
macro_rules! define_client_side {
246247
($($name:ident {
247248
$(fn $method:ident($($arg:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)*;)*
@@ -405,6 +406,9 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
405406
panic::catch_unwind(panic::AssertUnwindSafe(|| {
406407
maybe_install_panic_hook(force_show_panics);
407408

409+
// Make sure the symbol store is empty before decoding inputs.
410+
Symbol::invalidate_all();
411+
408412
let reader = &mut &buf[..];
409413
let (globals, input) = <(ExpnGlobals<Span>, A)>::decode(reader, &mut ());
410414

@@ -438,6 +442,10 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
438442
buf.clear();
439443
Err::<(), _>(e).encode(&mut buf, &mut ());
440444
});
445+
446+
// Now that a response has been serialized, invalidate all symbols
447+
// registered with the interner.
448+
Symbol::invalidate_all();
441449
buf
442450
}
443451

proc_macro/src/bridge/mod.rs

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,24 +65,19 @@ macro_rules! with_api {
6565
fn from_str(src: &str) -> $S::TokenStream;
6666
fn to_string($self: &$S::TokenStream) -> String;
6767
fn from_token_tree(
68-
tree: TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>,
68+
tree: TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>,
6969
) -> $S::TokenStream;
7070
fn concat_trees(
7171
base: Option<$S::TokenStream>,
72-
trees: Vec<TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>>,
72+
trees: Vec<TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>>,
7373
) -> $S::TokenStream;
7474
fn concat_streams(
7575
base: Option<$S::TokenStream>,
7676
streams: Vec<$S::TokenStream>,
7777
) -> $S::TokenStream;
7878
fn into_trees(
7979
$self: $S::TokenStream
80-
) -> Vec<TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>>;
81-
},
82-
Ident {
83-
fn new(string: &str, span: $S::Span, is_raw: bool) -> $S::Ident;
84-
fn span($self: $S::Ident) -> $S::Span;
85-
fn with_span($self: $S::Ident, span: $S::Span) -> $S::Ident;
80+
) -> Vec<TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>>;
8681
},
8782
Literal {
8883
fn drop($self: $S::Literal);
@@ -146,6 +141,9 @@ macro_rules! with_api {
146141
fn save_span($self: $S::Span) -> usize;
147142
fn recover_proc_macro_span(id: usize) -> $S::Span;
148143
},
144+
Symbol {
145+
fn normalize_and_validate_ident(string: &str) -> Result<$S::Symbol, ()>;
146+
},
149147
}
150148
};
151149
}
@@ -170,6 +168,8 @@ macro_rules! reverse_decode {
170168
}
171169
}
172170

171+
#[allow(unsafe_code)]
172+
mod arena;
173173
#[allow(unsafe_code)]
174174
mod buffer;
175175
#[forbid(unsafe_code)]
@@ -189,6 +189,8 @@ mod scoped_cell;
189189
mod selfless_reify;
190190
#[forbid(unsafe_code)]
191191
pub mod server;
192+
#[allow(unsafe_code)]
193+
mod symbol;
192194

193195
use buffer::Buffer;
194196
pub use rpc::PanicMessage;
@@ -466,16 +468,25 @@ pub struct Punct<Span> {
466468

467469
compound_traits!(struct Punct<Span> { ch, joint, span });
468470

471+
#[derive(Copy, Clone, Eq, PartialEq)]
472+
pub struct Ident<Span, Symbol> {
473+
pub sym: Symbol,
474+
pub is_raw: bool,
475+
pub span: Span,
476+
}
477+
478+
compound_traits!(struct Ident<Span, Symbol> { sym, is_raw, span });
479+
469480
#[derive(Clone)]
470-
pub enum TokenTree<TokenStream, Span, Ident, Literal> {
481+
pub enum TokenTree<TokenStream, Span, Symbol, Literal> {
471482
Group(Group<TokenStream, Span>),
472483
Punct(Punct<Span>),
473-
Ident(Ident),
484+
Ident(Ident<Span, Symbol>),
474485
Literal(Literal),
475486
}
476487

477488
compound_traits!(
478-
enum TokenTree<TokenStream, Span, Ident, Literal> {
489+
enum TokenTree<TokenStream, Span, Symbol, Literal> {
479490
Group(tt),
480491
Punct(tt),
481492
Ident(tt),

proc_macro/src/bridge/server.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ use super::client::HandleStore;
88
pub trait Types {
99
type FreeFunctions: 'static;
1010
type TokenStream: 'static + Clone;
11-
type Ident: 'static + Copy + Eq + Hash;
1211
type Literal: 'static + Clone;
1312
type SourceFile: 'static + Clone;
1413
type MultiSpan: 'static;
1514
type Diagnostic: 'static;
1615
type Span: 'static + Copy + Eq + Hash;
16+
type Symbol: 'static;
1717
}
1818

1919
/// Declare an associated fn of one of the traits below, adding necessary
@@ -38,6 +38,12 @@ macro_rules! declare_server_traits {
3838

3939
pub trait Server: Types $(+ $name)* {
4040
fn globals(&mut self) -> ExpnGlobals<Self::Span>;
41+
42+
/// Intern a symbol received from RPC
43+
fn intern_symbol(ident: &str) -> Self::Symbol;
44+
45+
/// Recover the string value of a symbol, and invoke a callback with it.
46+
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str));
4147
}
4248
}
4349
}
@@ -49,6 +55,12 @@ impl<S: Server> Server for MarkedTypes<S> {
4955
fn globals(&mut self) -> ExpnGlobals<Self::Span> {
5056
<_>::mark(Server::globals(&mut self.0))
5157
}
58+
fn intern_symbol(ident: &str) -> Self::Symbol {
59+
<_>::mark(S::intern_symbol(ident))
60+
}
61+
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) {
62+
S::with_symbol_string(symbol.unmark(), f)
63+
}
5264
}
5365

5466
macro_rules! define_mark_types_impls {
@@ -81,11 +93,13 @@ macro_rules! define_dispatcher_impl {
8193
pub trait DispatcherTrait {
8294
// HACK(eddyb) these are here to allow `Self::$name` to work below.
8395
$(type $name;)*
96+
8497
fn dispatch(&mut self, buf: Buffer) -> Buffer;
8598
}
8699

87100
impl<S: Server> DispatcherTrait for Dispatcher<MarkedTypes<S>> {
88101
$(type $name = <MarkedTypes<S> as Types>::$name;)*
102+
89103
fn dispatch(&mut self, mut buf: Buffer) -> Buffer {
90104
let Dispatcher { handle_store, server } = self;
91105

0 commit comments

Comments
 (0)