Skip to content

bootstrap: use internment instead of hand-rolled interning #128289

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions src/bootstrap/Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ dependencies = [
"memchr",
]

[[package]]
name = "allocator-api2"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "611cc2ae7d2e242c457e4be7f97036b8ad9ca152b499f53faf99b1ed8fc2553f"

[[package]]
name = "anstyle"
version = "1.0.8"
Expand Down Expand Up @@ -44,6 +50,7 @@ dependencies = [
"fd-lock",
"home",
"ignore",
"internment",
"junction",
"libc",
"object",
Expand Down Expand Up @@ -219,6 +226,12 @@ dependencies = [
"crypto-common",
]

[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"

[[package]]
name = "errno"
version = "0.3.9"
Expand Down Expand Up @@ -252,6 +265,12 @@ dependencies = [
"windows-sys 0.59.0",
]

[[package]]
name = "foldhash"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2"

[[package]]
name = "generic-array"
version = "0.14.7"
Expand All @@ -275,6 +294,17 @@ dependencies = [
"regex-syntax",
]

[[package]]
name = "hashbrown"
version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]

[[package]]
name = "heck"
version = "0.5.0"
Expand Down Expand Up @@ -306,6 +336,15 @@ dependencies = [
"winapi-util",
]

[[package]]
name = "internment"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "636d4b0f6a39fd684effe2a73f5310df16a3fa7954c26d36833e98f44d1977a2"
dependencies = [
"hashbrown",
]

[[package]]
name = "itoa"
version = "1.0.11"
Expand Down
1 change: 1 addition & 0 deletions src/bootstrap/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ clap = { version = "4.4", default-features = false, features = ["std", "usage",
clap_complete = "4.4"
fd-lock = "4.0"
home = "0.5"
internment = "0.8.5"
ignore = "0.4"
libc = "0.2"
object = { version = "0.36.3", default-features = false, features = ["archive", "coff", "read_core", "unaligned"] }
Expand Down
30 changes: 16 additions & 14 deletions src/bootstrap/src/core/config/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::core::build_steps::llvm;
pub use crate::core::config::flags::Subcommand;
use crate::core::config::flags::{Color, Flags, Warnings};
use crate::core::download::is_download_ci_available;
use crate::utils::cache::{INTERNER, Interned};
use crate::utils::cache::Interned;
use crate::utils::channel::{self, GitInfo};
use crate::utils::helpers::{self, exe, output, t};

Expand Down Expand Up @@ -465,15 +465,21 @@ impl std::str::FromStr for RustcLto {
}
}

#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
// N.B.: This type is used everywhere, and the entire codebase relies on it being Copy.
// Making !Copy is highly nontrivial!
pub struct TargetSelection {
pub triple: Interned<String>,
file: Option<Interned<String>>,
pub triple: Interned<str>,
file: Option<Interned<str>>,
synthetic: bool,
}

impl Default for TargetSelection {
fn default() -> Self {
Self { triple: "".into(), file: Default::default(), synthetic: Default::default() }
}
}

/// Newtype over `Vec<TargetSelection>` so we can implement custom parsing logic
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub struct TargetSelectionList(Vec<TargetSelection>);
Expand All @@ -500,18 +506,14 @@ impl TargetSelection {
(selection, None)
};

let triple = INTERNER.intern_str(triple);
let file = file.map(|f| INTERNER.intern_str(f));
let triple: Interned<str> = triple.into();
let file: Option<Interned<str>> = file.map(|f| f.into());

Self { triple, file, synthetic: false }
}

pub fn create_synthetic(triple: &str, file: &str) -> Self {
Self {
triple: INTERNER.intern_str(triple),
file: Some(INTERNER.intern_str(file)),
synthetic: true,
}
Self { triple: triple.into(), file: Some(file.into()), synthetic: true }
}

pub fn rustc_target_arg(&self) -> &str {
Expand Down Expand Up @@ -571,15 +573,15 @@ impl fmt::Debug for TargetSelection {

impl PartialEq<&str> for TargetSelection {
fn eq(&self, other: &&str) -> bool {
self.triple == *other
&*self.triple == *other
}
}

// Targets are often used as directory names throughout bootstrap.
// This impl makes it more ergonomics to use them as such.
impl AsRef<Path> for TargetSelection {
fn as_ref(&self) -> &Path {
self.triple.as_ref()
(*self.triple).as_ref()
}
}

Expand Down Expand Up @@ -2119,7 +2121,7 @@ impl Config {
// thus, disabled
// - similarly, lld will not be built nor used by default when explicitly asked not to, e.g.
// when the config sets `rust.lld = false`
if config.build.triple == "x86_64-unknown-linux-gnu"
if &*config.build.triple == "x86_64-unknown-linux-gnu"
&& config.hosts == [config.build]
&& (config.channel == "dev" || config.channel == "nightly")
{
Expand Down
190 changes: 1 addition & 189 deletions src/bootstrap/src/utils/cache.rs
Original file line number Diff line number Diff line change
@@ -1,198 +1,10 @@
use std::any::{Any, TypeId};
use std::borrow::Borrow;
use std::cell::RefCell;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
use std::ops::Deref;
use std::path::PathBuf;
use std::sync::{LazyLock, Mutex};
use std::{fmt, mem};

use crate::core::builder::Step;

pub struct Interned<T>(usize, PhantomData<*const T>);

impl<T: Internable + Default> Default for Interned<T> {
fn default() -> Self {
T::default().intern()
}
}

impl<T> Copy for Interned<T> {}
impl<T> Clone for Interned<T> {
fn clone(&self) -> Interned<T> {
*self
}
}

impl<T> PartialEq for Interned<T> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl<T> Eq for Interned<T> {}

impl PartialEq<str> for Interned<String> {
fn eq(&self, other: &str) -> bool {
*self == other
}
}
impl PartialEq<&str> for Interned<String> {
fn eq(&self, other: &&str) -> bool {
**self == **other
}
}
impl<T> PartialEq<&Interned<T>> for Interned<T> {
fn eq(&self, other: &&Self) -> bool {
self.0 == other.0
}
}
impl<T> PartialEq<Interned<T>> for &Interned<T> {
fn eq(&self, other: &Interned<T>) -> bool {
self.0 == other.0
}
}

unsafe impl<T> Send for Interned<T> {}
unsafe impl<T> Sync for Interned<T> {}

impl fmt::Display for Interned<String> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s: &str = self;
f.write_str(s)
}
}

impl<T, U: ?Sized + fmt::Debug> fmt::Debug for Interned<T>
where
Self: Deref<Target = U>,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s: &U = self;
f.write_fmt(format_args!("{s:?}"))
}
}

impl<T: Internable + Hash> Hash for Interned<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
let l = T::intern_cache().lock().unwrap();
l.get(*self).hash(state)
}
}

impl<T: Internable + Deref> Deref for Interned<T> {
type Target = T::Target;
fn deref(&self) -> &Self::Target {
let l = T::intern_cache().lock().unwrap();
unsafe { mem::transmute::<&Self::Target, &Self::Target>(l.get(*self)) }
}
}

impl<T: Internable + AsRef<U>, U: ?Sized> AsRef<U> for Interned<T> {
fn as_ref(&self) -> &U {
let l = T::intern_cache().lock().unwrap();
unsafe { mem::transmute::<&U, &U>(l.get(*self).as_ref()) }
}
}

impl<T: Internable + PartialOrd> PartialOrd for Interned<T> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
let l = T::intern_cache().lock().unwrap();
l.get(*self).partial_cmp(l.get(*other))
}
}

impl<T: Internable + Ord> Ord for Interned<T> {
fn cmp(&self, other: &Self) -> Ordering {
let l = T::intern_cache().lock().unwrap();
l.get(*self).cmp(l.get(*other))
}
}

struct TyIntern<T: Clone + Eq> {
items: Vec<T>,
set: HashMap<T, Interned<T>>,
}

impl<T: Hash + Clone + Eq> Default for TyIntern<T> {
fn default() -> Self {
TyIntern { items: Vec::new(), set: Default::default() }
}
}

impl<T: Hash + Clone + Eq> TyIntern<T> {
fn intern_borrow<B>(&mut self, item: &B) -> Interned<T>
where
B: Eq + Hash + ToOwned<Owned = T> + ?Sized,
T: Borrow<B>,
{
if let Some(i) = self.set.get(item) {
return *i;
}
let item = item.to_owned();
let interned = Interned(self.items.len(), PhantomData::<*const T>);
self.set.insert(item.clone(), interned);
self.items.push(item);
interned
}

fn intern(&mut self, item: T) -> Interned<T> {
if let Some(i) = self.set.get(&item) {
return *i;
}
let interned = Interned(self.items.len(), PhantomData::<*const T>);
self.set.insert(item.clone(), interned);
self.items.push(item);
interned
}

fn get(&self, i: Interned<T>) -> &T {
&self.items[i.0]
}
}

#[derive(Default)]
pub struct Interner {
strs: Mutex<TyIntern<String>>,
paths: Mutex<TyIntern<PathBuf>>,
lists: Mutex<TyIntern<Vec<String>>>,
}

trait Internable: Clone + Eq + Hash + 'static {
fn intern_cache() -> &'static Mutex<TyIntern<Self>>;

fn intern(self) -> Interned<Self> {
Self::intern_cache().lock().unwrap().intern(self)
}
}

impl Internable for String {
fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
&INTERNER.strs
}
}

impl Internable for PathBuf {
fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
&INTERNER.paths
}
}

impl Internable for Vec<String> {
fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
&INTERNER.lists
}
}

impl Interner {
pub fn intern_str(&self, s: &str) -> Interned<String> {
self.strs.lock().unwrap().intern_borrow(s)
}
}

pub static INTERNER: LazyLock<Interner> = LazyLock::new(Interner::default);
pub type Interned<T> = internment::Intern<T>;

/// This is essentially a `HashMap` which allows storing any type in its input and
/// any type in its output. It is a write-once cache; values are never evicted,
Expand Down
Loading