@@ -2850,3 +2850,222 @@ pub fn used_keywords(edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
2850
2850
} )
2851
2851
. collect ( )
2852
2852
}
2853
+
2854
+ /// njn: update
2855
+ /// njn: could move this to byte_symbol module
2856
+ /// An interned string.
2857
+ ///
2858
+ /// Internally, a `Symbol` is implemented as an index, and all operations
2859
+ /// (including hashing, equality, and ordering) operate on that index. The use
2860
+ /// of `rustc_index::newtype_index!` means that `Option<Symbol>` only takes up 4 bytes,
2861
+ /// because `rustc_index::newtype_index!` reserves the last 256 values for tagging purposes.
2862
+ ///
2863
+ /// Note that `Symbol` cannot directly be a `rustc_index::newtype_index!` because it
2864
+ /// implements `fmt::Debug`, `Encodable`, and `Decodable` in special ways.
2865
+ #[ derive( Clone , Copy , PartialEq , Eq , PartialOrd , Ord , Hash ) ]
2866
+ pub struct ByteSymbol ( ByteSymbolIndex ) ;
2867
+
2868
+ rustc_index:: newtype_index! {
2869
+ #[ orderable]
2870
+ struct ByteSymbolIndex { }
2871
+ }
2872
+
2873
+ impl ByteSymbol {
2874
+ pub const fn new ( n : u32 ) -> Self {
2875
+ ByteSymbol ( ByteSymbolIndex :: from_u32 ( n) )
2876
+ }
2877
+
2878
+ /// Maps a string to its interned representation.
2879
+ #[ rustc_diagnostic_item = "ByteSymbolIntern" ]
2880
+ // njn: rename `string` variables as `byte_str`?
2881
+ pub fn intern ( string : & [ u8 ] ) -> Self {
2882
+ with_session_globals ( |session_globals| session_globals. byte_symbol_interner . intern ( string) )
2883
+ }
2884
+
2885
+ /// Access the underlying string. This is a slowish operation because it
2886
+ /// requires locking the symbol interner.
2887
+ ///
2888
+ /// Note that the lifetime of the return value is a lie. It's not the same
2889
+ /// as `&self`, but actually tied to the lifetime of the underlying
2890
+ /// interner. Interners are long-lived, and there are very few of them, and
2891
+ /// this function is typically used for short-lived things, so in practice
2892
+ /// it works out ok.
2893
+ /// njn: rename?
2894
+ pub fn as_byte_str ( & self ) -> & [ u8 ] {
2895
+ with_session_globals ( |session_globals| unsafe {
2896
+ std:: mem:: transmute :: < & [ u8 ] , & [ u8 ] > ( session_globals. byte_symbol_interner . get ( * self ) )
2897
+ } )
2898
+ }
2899
+
2900
+ pub fn as_u32 ( self ) -> u32 {
2901
+ self . 0 . as_u32 ( )
2902
+ }
2903
+
2904
+ // pub fn is_empty(self) -> bool {
2905
+ // self == sym::empty
2906
+ // }
2907
+ }
2908
+
2909
+ // njn: needed?
2910
+ impl fmt:: Debug for ByteSymbol {
2911
+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
2912
+ fmt:: Debug :: fmt ( self . as_byte_str ( ) , f)
2913
+ }
2914
+ }
2915
+
2916
+ // impl fmt::Display for Symbol {
2917
+ // fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2918
+ // fmt::Display::fmt(self.as_str(), f)
2919
+ // }
2920
+ // }
2921
+
2922
+ // impl<CTX> HashStable<CTX> for Symbol {
2923
+ // #[inline]
2924
+ // fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
2925
+ // self.as_str().hash_stable(hcx, hasher);
2926
+ // }
2927
+ // }
2928
+
2929
+ // impl<CTX> ToStableHashKey<CTX> for Symbol {
2930
+ // type KeyType = String;
2931
+ // #[inline]
2932
+ // fn to_stable_hash_key(&self, _: &CTX) -> String {
2933
+ // self.as_str().to_string()
2934
+ // }
2935
+ // }
2936
+
2937
+ // impl StableCompare for Symbol {
2938
+ // const CAN_USE_UNSTABLE_SORT: bool = true;
2939
+
2940
+ // fn stable_cmp(&self, other: &Self) -> std::cmp::Ordering {
2941
+ // self.as_str().cmp(other.as_str())
2942
+ // }
2943
+ // }
2944
+
2945
+ #[ derive( Default ) ]
2946
+ pub ( crate ) struct ByteInterner ( Lock < ByteInternerInner > ) ;
2947
+
2948
+ // njn: update comment
2949
+ // The `&'static str`s in this type actually point into the arena.
2950
+ //
2951
+ // This type is private to prevent accidentally constructing more than one
2952
+ // `Interner` on the same thread, which makes it easy to mix up `Symbol`s
2953
+ // between `Interner`s.
2954
+ // njn: parameterize?
2955
+ #[ derive( Default ) ]
2956
+ struct ByteInternerInner {
2957
+ arena : DroplessArena ,
2958
+ strings : FxIndexSet < & ' static [ u8 ] > , // njn: rename?
2959
+ }
2960
+
2961
+ impl ByteInterner {
2962
+ // fn new(init: &[&'static str], extra: &[&'static str]) -> Self {
2963
+ // let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied()));
2964
+ // assert_eq!(
2965
+ // strings.len(),
2966
+ // init.len() + extra.len(),
2967
+ // "`init` or `extra` contain duplicate symbols",
2968
+ // );
2969
+ // Interner(Lock::new(ByteInternerInner { arena: Default::default(), strings }))
2970
+ // }
2971
+
2972
+ // fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self {
2973
+ // let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied()));
2974
+ // assert_eq!(
2975
+ // strings.len(),
2976
+ // init.len() + extra.len(),
2977
+ // "`init` or `extra` contain duplicate symbols",
2978
+ // );
2979
+ // Interner(Lock::new(InternerInner { arena: Default::default(), strings }))
2980
+ // }
2981
+
2982
+ #[ inline]
2983
+ fn intern ( & self , string : & [ u8 ] ) -> ByteSymbol {
2984
+ let mut inner = self . 0 . lock ( ) ;
2985
+ if let Some ( idx) = inner. strings . get_index_of ( string) {
2986
+ return ByteSymbol :: new ( idx as u32 ) ;
2987
+ }
2988
+
2989
+ let string: & [ u8 ] = inner. arena . alloc_slice ( string) ;
2990
+
2991
+ // SAFETY: we can extend the arena allocation to `'static` because we
2992
+ // only access these while the arena is still alive.
2993
+ let string: & ' static [ u8 ] = unsafe { & * ( string as * const [ u8 ] ) } ;
2994
+
2995
+ // This second hash table lookup can be avoided by using `RawEntryMut`,
2996
+ // but this code path isn't hot enough for it to be worth it. See
2997
+ // #91445 for details.
2998
+ let ( idx, is_new) = inner. strings . insert_full ( string) ;
2999
+ debug_assert ! ( is_new) ; // due to the get_index_of check above
3000
+
3001
+ ByteSymbol :: new ( idx as u32 )
3002
+ }
3003
+
3004
+ /// Get the symbol as a string.
3005
+ ///
3006
+ /// [`ByteSymbol::as_str()`] should be used in preference to this function.
3007
+ /// // njn: rename as_str in that comment?
3008
+ fn get ( & self , symbol : ByteSymbol ) -> & [ u8 ] {
3009
+ self . 0 . lock ( ) . strings . get_index ( symbol. 0 . as_usize ( ) ) . unwrap ( )
3010
+ }
3011
+ }
3012
+
3013
+ impl Symbol {
3014
+ // fn is_special(self) -> bool {
3015
+ // self <= kw::Underscore
3016
+ // }
3017
+
3018
+ // fn is_used_keyword_always(self) -> bool {
3019
+ // self >= kw::As && self <= kw::While
3020
+ // }
3021
+
3022
+ // fn is_unused_keyword_always(self) -> bool {
3023
+ // self >= kw::Abstract && self <= kw::Yield
3024
+ // }
3025
+
3026
+ // fn is_used_keyword_conditional(self, edition: impl FnOnce() -> Edition) -> bool {
3027
+ // (self >= kw::Async && self <= kw::Dyn) && edition() >= Edition::Edition2018
3028
+ // }
3029
+
3030
+ // fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
3031
+ // self == kw::Gen && edition().at_least_rust_2024()
3032
+ // || self == kw::Try && edition().at_least_rust_2018()
3033
+ // }
3034
+
3035
+ // pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
3036
+ // self.is_special()
3037
+ // || self.is_used_keyword_always()
3038
+ // || self.is_unused_keyword_always()
3039
+ // || self.is_used_keyword_conditional(edition)
3040
+ // || self.is_unused_keyword_conditional(edition)
3041
+ // }
3042
+
3043
+ // pub fn is_weak(self) -> bool {
3044
+ // self >= kw::Auto && self <= kw::Yeet
3045
+ // }
3046
+
3047
+ // /// A keyword or reserved identifier that can be used as a path segment.
3048
+ // pub fn is_path_segment_keyword(self) -> bool {
3049
+ // self == kw::Super
3050
+ // || self == kw::SelfLower
3051
+ // || self == kw::SelfUpper
3052
+ // || self == kw::Crate
3053
+ // || self == kw::PathRoot
3054
+ // || self == kw::DollarCrate
3055
+ // }
3056
+
3057
+ // /// Returns `true` if the symbol is `true` or `false`.
3058
+ // pub fn is_bool_lit(self) -> bool {
3059
+ // self == kw::True || self == kw::False
3060
+ // }
3061
+
3062
+ // /// Returns `true` if this symbol can be a raw identifier.
3063
+ // pub fn can_be_raw(self) -> bool {
3064
+ // self != sym::empty && self != kw::Underscore && !self.is_path_segment_keyword()
3065
+ // }
3066
+
3067
+ // /// Was this symbol predefined in the compiler's `symbols!` macro
3068
+ // pub fn is_predefined(self) -> bool {
3069
+ // self.as_u32() < PREDEFINED_SYMBOLS_COUNT
3070
+ // }
3071
+ }
0 commit comments