Skip to content

Commit b2bff4f

Browse files
Shard AllocMap Lock
This improves performance on many-seed parallel (-Zthreads=32) miri executions from managing to use ~8 cores to using 27-28 cores. That's pretty reasonable scaling for the simplicity of this solution.
1 parent 0df0662 commit b2bff4f

File tree

2 files changed

+41
-25
lines changed

2 files changed

+41
-25
lines changed

compiler/rustc_middle/src/mir/interpret/mod.rs

+39-23
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@ mod value;
1010

1111
use std::io::{Read, Write};
1212
use std::num::NonZero;
13+
use std::sync::atomic::AtomicU64;
1314
use std::{fmt, io};
1415

1516
use rustc_abi::{AddressSpace, Align, Endian, HasDataLayout, Size};
1617
use rustc_ast::{LitKind, Mutability};
1718
use rustc_data_structures::fx::FxHashMap;
19+
use rustc_data_structures::sharded::ShardedHashMap;
1820
use rustc_data_structures::sync::Lock;
1921
use rustc_hir::def::DefKind;
2022
use rustc_hir::def_id::{DefId, LocalDefId};
@@ -389,35 +391,37 @@ pub const CTFE_ALLOC_SALT: usize = 0;
389391

390392
pub(crate) struct AllocMap<'tcx> {
391393
/// Maps `AllocId`s to their corresponding allocations.
392-
alloc_map: FxHashMap<AllocId, GlobalAlloc<'tcx>>,
394+
// Note that this map on rustc workloads seems to be rather dense. In #136105 we considered
395+
// replacing it with a (dense) Vec-based map, but since there are workloads where it can be
396+
// sparse we decided to go with sharding for now. At least up to 32 cores there doesn't seem to
397+
// be much difference in performance on the one parallel workload we tested.
398+
to_alloc: ShardedHashMap<AllocId, GlobalAlloc<'tcx>>,
393399

394400
/// Used to deduplicate global allocations: functions, vtables, string literals, ...
395401
///
396402
/// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating
397403
/// the actual guarantees.
398-
dedup: FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>,
404+
dedup: Lock<FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>>,
399405

400406
/// The `AllocId` to assign to the next requested ID.
401407
/// Always incremented; never gets smaller.
402-
next_id: AllocId,
408+
next_id: AtomicU64,
403409
}
404410

405411
impl<'tcx> AllocMap<'tcx> {
406412
pub(crate) fn new() -> Self {
407413
AllocMap {
408-
alloc_map: Default::default(),
414+
to_alloc: Default::default(),
409415
dedup: Default::default(),
410-
next_id: AllocId(NonZero::new(1).unwrap()),
416+
next_id: AtomicU64::new(1),
411417
}
412418
}
413-
fn reserve(&mut self) -> AllocId {
414-
let next = self.next_id;
415-
self.next_id.0 = self.next_id.0.checked_add(1).expect(
416-
"You overflowed a u64 by incrementing by 1... \
417-
You've just earned yourself a free drink if we ever meet. \
418-
Seriously, how did you do that?!",
419-
);
420-
next
419+
fn reserve(&self) -> AllocId {
420+
// Technically there is a window here where we overflow and then another thread
421+
// increments `next_id` *again* and uses it before we panic and tear down the entire session.
422+
// We consider this fine since such overflows cannot realistically occur.
423+
let next_id = self.next_id.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
424+
AllocId(NonZero::new(next_id).unwrap())
421425
}
422426
}
423427

@@ -428,26 +432,33 @@ impl<'tcx> TyCtxt<'tcx> {
428432
/// Make sure to call `set_alloc_id_memory` or `set_alloc_id_same_memory` before returning such
429433
/// an `AllocId` from a query.
430434
pub fn reserve_alloc_id(self) -> AllocId {
431-
self.alloc_map.lock().reserve()
435+
self.alloc_map.reserve()
432436
}
433437

434438
/// Reserves a new ID *if* this allocation has not been dedup-reserved before.
435439
/// Should not be used for mutable memory.
436440
fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId {
437-
let mut alloc_map = self.alloc_map.lock();
438441
if let GlobalAlloc::Memory(mem) = alloc {
439442
if mem.inner().mutability.is_mut() {
440443
bug!("trying to dedup-reserve mutable memory");
441444
}
442445
}
443446
let alloc_salt = (alloc, salt);
444-
if let Some(&alloc_id) = alloc_map.dedup.get(&alloc_salt) {
447+
let mut dedup = self.alloc_map.dedup.lock();
448+
if let Some(&alloc_id) = dedup.get(&alloc_salt) {
445449
return alloc_id;
446450
}
447-
let id = alloc_map.reserve();
451+
let id = self.alloc_map.reserve();
448452
debug!("creating alloc {:?} with id {id:?}", alloc_salt.0);
449-
alloc_map.alloc_map.insert(id, alloc_salt.0.clone());
450-
alloc_map.dedup.insert(alloc_salt, id);
453+
// We just reserved, so should always be unique.
454+
assert!(
455+
self.alloc_map
456+
.to_alloc
457+
.lock_shard_by_value(&id)
458+
.insert(id, alloc_salt.0.clone())
459+
.is_none()
460+
);
461+
dedup.insert(alloc_salt, id);
451462
id
452463
}
453464

@@ -497,7 +508,7 @@ impl<'tcx> TyCtxt<'tcx> {
497508
/// local dangling pointers and allocations in constants/statics.
498509
#[inline]
499510
pub fn try_get_global_alloc(self, id: AllocId) -> Option<GlobalAlloc<'tcx>> {
500-
self.alloc_map.lock().alloc_map.get(&id).cloned()
511+
self.alloc_map.to_alloc.lock_shard_by_value(&id).get(&id).cloned()
501512
}
502513

503514
#[inline]
@@ -516,16 +527,21 @@ impl<'tcx> TyCtxt<'tcx> {
516527
/// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. Trying to
517528
/// call this function twice, even with the same `Allocation` will ICE the compiler.
518529
pub fn set_alloc_id_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) {
519-
if let Some(old) = self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Memory(mem)) {
530+
if let Some(old) =
531+
self.alloc_map.to_alloc.lock_shard_by_value(&id).insert(id, GlobalAlloc::Memory(mem))
532+
{
520533
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
521534
}
522535
}
523536

524537
/// Freezes an `AllocId` created with `reserve` by pointing it at a static item. Trying to
525538
/// call this function twice, even with the same `DefId` will ICE the compiler.
526539
pub fn set_nested_alloc_id_static(self, id: AllocId, def_id: LocalDefId) {
527-
if let Some(old) =
528-
self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
540+
if let Some(old) = self
541+
.alloc_map
542+
.to_alloc
543+
.lock_shard_by_value(&id)
544+
.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
529545
{
530546
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
531547
}

compiler/rustc_middle/src/ty/context.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1348,7 +1348,7 @@ pub struct GlobalCtxt<'tcx> {
13481348
pub data_layout: TargetDataLayout,
13491349

13501350
/// Stores memory for globals (statics/consts).
1351-
pub(crate) alloc_map: Lock<interpret::AllocMap<'tcx>>,
1351+
pub(crate) alloc_map: interpret::AllocMap<'tcx>,
13521352
}
13531353

13541354
/// This is used to get a reference to a `GlobalCtxt` if one is available.
@@ -1538,7 +1538,7 @@ impl<'tcx> TyCtxt<'tcx> {
15381538
new_solver_evaluation_cache: Default::default(),
15391539
canonical_param_env_cache: Default::default(),
15401540
data_layout,
1541-
alloc_map: Lock::new(interpret::AllocMap::new()),
1541+
alloc_map: interpret::AllocMap::new(),
15421542
});
15431543

15441544
let icx = tls::ImplicitCtxt::new(&gcx);

0 commit comments

Comments
 (0)