Skip to content

Commit bd64b19

Browse files
committed
Split and rename Interner::get.
Most of the callers cannot be a gensym, so this commit introduces a new function that doesn't look for that case. (It will panic with a bounds check failure if that case somehow arises.) The commit also adds comments better explaining `Symbol`, `InternedString` and `LocalInternedString`.
1 parent 3356887 commit bd64b19

File tree

2 files changed

+42
-16
lines changed

2 files changed

+42
-16
lines changed

src/librustc_codegen_llvm/debuginfo/metadata.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ impl TypeMap<'ll, 'tcx> {
170170
// the id is unknown.
171171
fn get_unique_type_id_as_string(&self, unique_type_id: UniqueTypeId) -> &str {
172172
let UniqueTypeId(interner_key) = unique_type_id;
173-
self.unique_id_interner.get(interner_key)
173+
self.unique_id_interner.symbol_str(interner_key)
174174
}
175175

176176
// Get the UniqueTypeId for the given type. If the UniqueTypeId for the given
@@ -226,7 +226,7 @@ impl TypeMap<'ll, 'tcx> {
226226
let variant_part_type_id = format!("{}_variant_part",
227227
self.get_unique_type_id_as_string(enum_type_id));
228228
let interner_key = self.unique_id_interner.intern(&variant_part_type_id);
229-
self.unique_id_interner.get(interner_key)
229+
self.unique_id_interner.symbol_str(interner_key)
230230
}
231231
}
232232

src/libsyntax_pos/symbol.rs

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,18 @@ impl Decodable for Ident {
344344
}
345345
}
346346

347-
/// A symbol is an interned or gensymed string. The use of `newtype_index!` means
348-
/// that `Option<Symbol>` only takes up 4 bytes, because `newtype_index!` reserves
349-
/// the last 256 values for tagging purposes.
347+
/// A symbol is an interned or gensymed string. (A gensym is a special kind of
348+
/// symbol that is never equal to any other symbol. E.g.:
349+
/// - `Symbol::intern("x") == Symbol::intern("x")`
350+
/// - `Symbol::gensym("x") != Symbol::intern("x")`
351+
/// - `Symbol::gensym("x") != Symbol::gensym("x")`
352+
///
353+
/// Gensyms are useful when creating new identifiers that must not match any
354+
/// existing identifiers during macro expansion and syntax desugaring.)
355+
///
356+
/// The use of `newtype_index!` means that `Option<Symbol>` only takes up 4
357+
/// bytes, because `newtype_index!` reserves the last 256 values for tagging
358+
/// purposes.
350359
///
351360
/// Note that `Symbol` cannot directly be a `newtype_index!` because it implements
352361
/// `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
@@ -387,7 +396,7 @@ impl Symbol {
387396
pub fn as_str(self) -> LocalInternedString {
388397
with_interner(|interner| unsafe {
389398
LocalInternedString {
390-
string: std::mem::transmute::<&str, &str>(interner.get(self))
399+
string: std::mem::transmute::<&str, &str>(interner.symbol_or_gensym_str(self))
391400
}
392401
})
393402
}
@@ -510,7 +519,13 @@ impl Interner {
510519
symbol.0.as_usize() >= self.strings.len()
511520
}
512521

513-
pub fn get(&self, symbol: Symbol) -> &str {
522+
/// Get the chars of a normal (non-gensym) symbol.
523+
pub fn symbol_str(&self, symbol: Symbol) -> &str {
524+
self.strings[symbol.0.as_usize()]
525+
}
526+
527+
/// Get the chars of a normal or gensym symbol.
528+
fn symbol_or_gensym_str(&self, symbol: Symbol) -> &str {
514529
match self.strings.get(symbol.0.as_usize()) {
515530
Some(string) => string,
516531
None => {
@@ -614,11 +629,17 @@ fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
614629
GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock()))
615630
}
616631

617-
/// Represents a string stored in the interner. Because the interner outlives any thread
618-
/// which uses this type, we can safely treat `string` which points to interner data,
619-
/// as an immortal string, as long as this type never crosses between threads.
620-
// FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`,
621-
// by creating a new thread right after constructing the interner.
632+
/// An alternative to `Symbol` and `LocalInternedString`, useful when the chars
633+
/// within the symbol need to be accessed. It is best used for temporary
634+
/// values.
635+
///
636+
/// Because the interner outlives any thread which uses this type, we can
637+
/// safely treat `string` which points to interner data, as an immortal string,
638+
/// as long as this type never crosses between threads.
639+
//
640+
// FIXME: ensure that the interner outlives any thread which uses
641+
// `LocalInternedString`, by creating a new thread right after constructing the
642+
// interner.
622643
#[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)]
623644
pub struct LocalInternedString {
624645
string: &'static str,
@@ -711,7 +732,12 @@ impl Encodable for LocalInternedString {
711732
}
712733
}
713734

714-
/// Represents a string stored in the string interner.
735+
/// A thin wrapper around `Symbol`. It has two main differences to `Symbol`.
736+
/// - Its implementations of `Hash`, `PartialOrd` and `Ord` work with the
737+
/// string chars rather than the symbol integer. This is useful when
738+
/// hash stability is required across compile sessions, or a guaranteed sort
739+
/// ordering is required.
740+
/// - It is guaranteed to not be a gensym symbol.
715741
#[derive(Clone, Copy, Eq)]
716742
pub struct InternedString {
717743
symbol: Symbol,
@@ -720,7 +746,7 @@ pub struct InternedString {
720746
impl InternedString {
721747
pub fn with<F: FnOnce(&str) -> R, R>(self, f: F) -> R {
722748
let str = with_interner(|interner| {
723-
interner.get(self.symbol) as *const str
749+
interner.symbol_str(self.symbol) as *const str
724750
});
725751
// This is safe because the interner keeps string alive until it is dropped.
726752
// We can access it because we know the interner is still alive since we use a
@@ -730,8 +756,8 @@ impl InternedString {
730756

731757
pub fn with2<F: FnOnce(&str, &str) -> R, R>(self, other: &InternedString, f: F) -> R {
732758
let (self_str, other_str) = with_interner(|interner| {
733-
(interner.get(self.symbol) as *const str,
734-
interner.get(other.symbol) as *const str)
759+
(interner.symbol_str(self.symbol) as *const str,
760+
interner.symbol_str(other.symbol) as *const str)
735761
});
736762
// This is safe for the same reason that `with` is safe.
737763
unsafe { f(&*self_str, &*other_str) }

0 commit comments

Comments
 (0)