Skip to content

Commit 896275c

Browse files
committed
XXX: add ByteSymbol
1 parent 99e7c15 commit 896275c

File tree

2 files changed

+224
-1
lines changed

2 files changed

+224
-1
lines changed

compiler/rustc_span/src/lib.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ mod span_encoding;
6666
pub use span_encoding::{DUMMY_SP, Span};
6767

6868
pub mod symbol;
69-
pub use symbol::{Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym};
69+
pub use symbol::{
70+
ByteSymbol, Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym,
71+
};
7072

7173
mod analyze_source_file;
7274
pub mod fatal_error;
@@ -101,6 +103,7 @@ mod tests;
101103
/// session.
102104
pub struct SessionGlobals {
103105
symbol_interner: symbol::Interner,
106+
byte_symbol_interner: symbol::ByteInterner,
104107
span_interner: Lock<span_encoding::SpanInterner>,
105108
/// Maps a macro argument token into use of the corresponding metavariable in the macro body.
106109
/// Collisions are possible and processed in `maybe_use_metavar_location` on best effort basis.
@@ -121,6 +124,7 @@ impl SessionGlobals {
121124
) -> SessionGlobals {
122125
SessionGlobals {
123126
symbol_interner: symbol::Interner::with_extra_symbols(extra_symbols),
127+
byte_symbol_interner: symbol::ByteInterner::default(),
124128
span_interner: Lock::new(span_encoding::SpanInterner::default()),
125129
metavar_spans: Default::default(),
126130
hygiene_data: Lock::new(hygiene::HygieneData::new(edition)),

compiler/rustc_span/src/symbol.rs

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2850,3 +2850,222 @@ pub fn used_keywords(edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
28502850
})
28512851
.collect()
28522852
}
2853+
2854+
/// njn: update
2855+
/// njn: could move this to byte_symbol module
2856+
/// An interned string.
2857+
///
2858+
/// Internally, a `Symbol` is implemented as an index, and all operations
2859+
/// (including hashing, equality, and ordering) operate on that index. The use
2860+
/// of `rustc_index::newtype_index!` means that `Option<Symbol>` only takes up 4 bytes,
2861+
/// because `rustc_index::newtype_index!` reserves the last 256 values for tagging purposes.
2862+
///
2863+
/// Note that `Symbol` cannot directly be a `rustc_index::newtype_index!` because it
2864+
/// implements `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
2865+
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
2866+
pub struct ByteSymbol(ByteSymbolIndex);
2867+
2868+
rustc_index::newtype_index! {
2869+
#[orderable]
2870+
struct ByteSymbolIndex {}
2871+
}
2872+
2873+
impl ByteSymbol {
2874+
pub const fn new(n: u32) -> Self {
2875+
ByteSymbol(ByteSymbolIndex::from_u32(n))
2876+
}
2877+
2878+
/// Maps a string to its interned representation.
2879+
#[rustc_diagnostic_item = "ByteSymbolIntern"]
2880+
// njn: rename `string` variables as `byte_str`?
2881+
pub fn intern(string: &[u8]) -> Self {
2882+
with_session_globals(|session_globals| session_globals.byte_symbol_interner.intern(string))
2883+
}
2884+
2885+
/// Access the underlying string. This is a slowish operation because it
2886+
/// requires locking the symbol interner.
2887+
///
2888+
/// Note that the lifetime of the return value is a lie. It's not the same
2889+
/// as `&self`, but actually tied to the lifetime of the underlying
2890+
/// interner. Interners are long-lived, and there are very few of them, and
2891+
/// this function is typically used for short-lived things, so in practice
2892+
/// it works out ok.
2893+
/// njn: rename?
2894+
pub fn as_byte_str(&self) -> &[u8] {
2895+
with_session_globals(|session_globals| unsafe {
2896+
std::mem::transmute::<&[u8], &[u8]>(session_globals.byte_symbol_interner.get(*self))
2897+
})
2898+
}
2899+
2900+
pub fn as_u32(self) -> u32 {
2901+
self.0.as_u32()
2902+
}
2903+
2904+
// pub fn is_empty(self) -> bool {
2905+
// self == sym::empty
2906+
// }
2907+
}
2908+
2909+
// njn: needed?
2910+
impl fmt::Debug for ByteSymbol {
2911+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2912+
fmt::Debug::fmt(self.as_byte_str(), f)
2913+
}
2914+
}
2915+
2916+
// impl fmt::Display for Symbol {
2917+
// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2918+
// fmt::Display::fmt(self.as_str(), f)
2919+
// }
2920+
// }
2921+
2922+
// impl<CTX> HashStable<CTX> for Symbol {
2923+
// #[inline]
2924+
// fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
2925+
// self.as_str().hash_stable(hcx, hasher);
2926+
// }
2927+
// }
2928+
2929+
// impl<CTX> ToStableHashKey<CTX> for Symbol {
2930+
// type KeyType = String;
2931+
// #[inline]
2932+
// fn to_stable_hash_key(&self, _: &CTX) -> String {
2933+
// self.as_str().to_string()
2934+
// }
2935+
// }
2936+
2937+
// impl StableCompare for Symbol {
2938+
// const CAN_USE_UNSTABLE_SORT: bool = true;
2939+
2940+
// fn stable_cmp(&self, other: &Self) -> std::cmp::Ordering {
2941+
// self.as_str().cmp(other.as_str())
2942+
// }
2943+
// }
2944+
2945+
#[derive(Default)]
2946+
pub(crate) struct ByteInterner(Lock<ByteInternerInner>);
2947+
2948+
// njn: update comment
2949+
// The `&'static str`s in this type actually point into the arena.
2950+
//
2951+
// This type is private to prevent accidentally constructing more than one
2952+
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
2953+
// between `Interner`s.
2954+
// njn: parameterize?
2955+
#[derive(Default)]
2956+
struct ByteInternerInner {
2957+
arena: DroplessArena,
2958+
strings: FxIndexSet<&'static [u8]>, // njn: rename?
2959+
}
2960+
2961+
impl ByteInterner {
2962+
// fn new(init: &[&'static str], extra: &[&'static str]) -> Self {
2963+
// let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied()));
2964+
// assert_eq!(
2965+
// strings.len(),
2966+
// init.len() + extra.len(),
2967+
// "`init` or `extra` contain duplicate symbols",
2968+
// );
2969+
// Interner(Lock::new(ByteInternerInner { arena: Default::default(), strings }))
2970+
// }
2971+
2972+
// fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self {
2973+
// let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied()));
2974+
// assert_eq!(
2975+
// strings.len(),
2976+
// init.len() + extra.len(),
2977+
// "`init` or `extra` contain duplicate symbols",
2978+
// );
2979+
// Interner(Lock::new(InternerInner { arena: Default::default(), strings }))
2980+
// }
2981+
2982+
#[inline]
2983+
fn intern(&self, string: &[u8]) -> ByteSymbol {
2984+
let mut inner = self.0.lock();
2985+
if let Some(idx) = inner.strings.get_index_of(string) {
2986+
return ByteSymbol::new(idx as u32);
2987+
}
2988+
2989+
let string: &[u8] = inner.arena.alloc_slice(string);
2990+
2991+
// SAFETY: we can extend the arena allocation to `'static` because we
2992+
// only access these while the arena is still alive.
2993+
let string: &'static [u8] = unsafe { &*(string as *const [u8]) };
2994+
2995+
// This second hash table lookup can be avoided by using `RawEntryMut`,
2996+
// but this code path isn't hot enough for it to be worth it. See
2997+
// #91445 for details.
2998+
let (idx, is_new) = inner.strings.insert_full(string);
2999+
debug_assert!(is_new); // due to the get_index_of check above
3000+
3001+
ByteSymbol::new(idx as u32)
3002+
}
3003+
3004+
/// Get the symbol as a string.
3005+
///
3006+
/// [`ByteSymbol::as_str()`] should be used in preference to this function.
3007+
/// // njn: rename as_str in that comment?
3008+
fn get(&self, symbol: ByteSymbol) -> &[u8] {
3009+
self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap()
3010+
}
3011+
}
3012+
3013+
impl Symbol {
3014+
// fn is_special(self) -> bool {
3015+
// self <= kw::Underscore
3016+
// }
3017+
3018+
// fn is_used_keyword_always(self) -> bool {
3019+
// self >= kw::As && self <= kw::While
3020+
// }
3021+
3022+
// fn is_unused_keyword_always(self) -> bool {
3023+
// self >= kw::Abstract && self <= kw::Yield
3024+
// }
3025+
3026+
// fn is_used_keyword_conditional(self, edition: impl FnOnce() -> Edition) -> bool {
3027+
// (self >= kw::Async && self <= kw::Dyn) && edition() >= Edition::Edition2018
3028+
// }
3029+
3030+
// fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
3031+
// self == kw::Gen && edition().at_least_rust_2024()
3032+
// || self == kw::Try && edition().at_least_rust_2018()
3033+
// }
3034+
3035+
// pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
3036+
// self.is_special()
3037+
// || self.is_used_keyword_always()
3038+
// || self.is_unused_keyword_always()
3039+
// || self.is_used_keyword_conditional(edition)
3040+
// || self.is_unused_keyword_conditional(edition)
3041+
// }
3042+
3043+
// pub fn is_weak(self) -> bool {
3044+
// self >= kw::Auto && self <= kw::Yeet
3045+
// }
3046+
3047+
// /// A keyword or reserved identifier that can be used as a path segment.
3048+
// pub fn is_path_segment_keyword(self) -> bool {
3049+
// self == kw::Super
3050+
// || self == kw::SelfLower
3051+
// || self == kw::SelfUpper
3052+
// || self == kw::Crate
3053+
// || self == kw::PathRoot
3054+
// || self == kw::DollarCrate
3055+
// }
3056+
3057+
// /// Returns `true` if the symbol is `true` or `false`.
3058+
// pub fn is_bool_lit(self) -> bool {
3059+
// self == kw::True || self == kw::False
3060+
// }
3061+
3062+
// /// Returns `true` if this symbol can be a raw identifier.
3063+
// pub fn can_be_raw(self) -> bool {
3064+
// self != sym::empty && self != kw::Underscore && !self.is_path_segment_keyword()
3065+
// }
3066+
3067+
// /// Was this symbol predefined in the compiler's `symbols!` macro
3068+
// pub fn is_predefined(self) -> bool {
3069+
// self.as_u32() < PREDEFINED_SYMBOLS_COUNT
3070+
// }
3071+
}

0 commit comments

Comments
 (0)