Skip to content

Commit 9bdf536

Browse files
Shard AllocMap Lock
This improves performance on many-seed parallel (-Zthreads=32) miri executions from managing to use ~8 cores to using 27-28 cores. That's pretty reasonable scaling for the simplicity of this solution.
1 parent 7f36543 commit 9bdf536

File tree

2 files changed

+44
-25
lines changed

2 files changed

+44
-25
lines changed

compiler/rustc_middle/src/mir/interpret/mod.rs

+42-23
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@ mod value;
1010

1111
use std::io::{Read, Write};
1212
use std::num::NonZero;
13+
use std::sync::atomic::AtomicU64;
1314
use std::{fmt, io};
1415

1516
use rustc_abi::{AddressSpace, Align, Endian, HasDataLayout, Size};
1617
use rustc_ast::{LitKind, Mutability};
1718
use rustc_data_structures::fx::FxHashMap;
19+
use rustc_data_structures::sharded::ShardedHashMap;
1820
use rustc_data_structures::sync::Lock;
1921
use rustc_hir::def::DefKind;
2022
use rustc_hir::def_id::{DefId, LocalDefId};
@@ -389,35 +391,39 @@ pub const CTFE_ALLOC_SALT: usize = 0;
389391

390392
pub(crate) struct AllocMap<'tcx> {
391393
/// Maps `AllocId`s to their corresponding allocations.
392-
alloc_map: FxHashMap<AllocId, GlobalAlloc<'tcx>>,
394+
// Note that this map on rustc workloads seems to be rather dense, but in miri workloads should
395+
// be pretty sparse. In #136105 we considered replacing it with a (dense) Vec-based map, but
396+
// since there are workloads where it can be sparse we decided to go with sharding for now. At
397+
// least up to 32 cores the one workload tested didn't exhibit much difference between the two.
398+
//
399+
// Should be locked *after* locking dedup if locking both to avoid deadlocks.
400+
to_alloc: ShardedHashMap<AllocId, GlobalAlloc<'tcx>>,
393401

394402
/// Used to deduplicate global allocations: functions, vtables, string literals, ...
395403
///
396404
/// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating
397405
/// the actual guarantees.
398-
dedup: FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>,
406+
dedup: Lock<FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>>,
399407

400408
/// The `AllocId` to assign to the next requested ID.
401409
/// Always incremented; never gets smaller.
402-
next_id: AllocId,
410+
next_id: AtomicU64,
403411
}
404412

405413
impl<'tcx> AllocMap<'tcx> {
406414
pub(crate) fn new() -> Self {
407415
AllocMap {
408-
alloc_map: Default::default(),
416+
to_alloc: Default::default(),
409417
dedup: Default::default(),
410-
next_id: AllocId(NonZero::new(1).unwrap()),
418+
next_id: AtomicU64::new(1),
411419
}
412420
}
413-
fn reserve(&mut self) -> AllocId {
414-
let next = self.next_id;
415-
self.next_id.0 = self.next_id.0.checked_add(1).expect(
416-
"You overflowed a u64 by incrementing by 1... \
417-
You've just earned yourself a free drink if we ever meet. \
418-
Seriously, how did you do that?!",
419-
);
420-
next
421+
fn reserve(&self) -> AllocId {
422+
// Technically there is a window here where we overflow and then another thread
423+
// increments `next_id` *again* and uses it before we panic and tear down the entire session.
424+
// We consider this fine since such overflows cannot realistically occur.
425+
let next_id = self.next_id.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
426+
AllocId(NonZero::new(next_id).unwrap())
421427
}
422428
}
423429

@@ -428,26 +434,34 @@ impl<'tcx> TyCtxt<'tcx> {
428434
/// Make sure to call `set_alloc_id_memory` or `set_alloc_id_same_memory` before returning such
429435
/// an `AllocId` from a query.
430436
pub fn reserve_alloc_id(self) -> AllocId {
431-
self.alloc_map.lock().reserve()
437+
self.alloc_map.reserve()
432438
}
433439

434440
/// Reserves a new ID *if* this allocation has not been dedup-reserved before.
435441
/// Should not be used for mutable memory.
436442
fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId {
437-
let mut alloc_map = self.alloc_map.lock();
438443
if let GlobalAlloc::Memory(mem) = alloc {
439444
if mem.inner().mutability.is_mut() {
440445
bug!("trying to dedup-reserve mutable memory");
441446
}
442447
}
443448
let alloc_salt = (alloc, salt);
444-
if let Some(&alloc_id) = alloc_map.dedup.get(&alloc_salt) {
449+
// Locking this *before* `to_alloc` also to ensure correct lock order.
450+
let mut dedup = self.alloc_map.dedup.lock();
451+
if let Some(&alloc_id) = dedup.get(&alloc_salt) {
445452
return alloc_id;
446453
}
447-
let id = alloc_map.reserve();
454+
let id = self.alloc_map.reserve();
448455
debug!("creating alloc {:?} with id {id:?}", alloc_salt.0);
449-
alloc_map.alloc_map.insert(id, alloc_salt.0.clone());
450-
alloc_map.dedup.insert(alloc_salt, id);
456+
let had_previous = self
457+
.alloc_map
458+
.to_alloc
459+
.lock_shard_by_value(&id)
460+
.insert(id, alloc_salt.0.clone())
461+
.is_some();
462+
// We just reserved, so should always be unique.
463+
assert!(!had_previous);
464+
dedup.insert(alloc_salt, id);
451465
id
452466
}
453467

@@ -497,7 +511,7 @@ impl<'tcx> TyCtxt<'tcx> {
497511
/// local dangling pointers and allocations in constants/statics.
498512
#[inline]
499513
pub fn try_get_global_alloc(self, id: AllocId) -> Option<GlobalAlloc<'tcx>> {
500-
self.alloc_map.lock().alloc_map.get(&id).cloned()
514+
self.alloc_map.to_alloc.lock_shard_by_value(&id).get(&id).cloned()
501515
}
502516

503517
#[inline]
@@ -516,16 +530,21 @@ impl<'tcx> TyCtxt<'tcx> {
516530
/// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. Trying to
517531
/// call this function twice, even with the same `Allocation` will ICE the compiler.
518532
pub fn set_alloc_id_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) {
519-
if let Some(old) = self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Memory(mem)) {
533+
if let Some(old) =
534+
self.alloc_map.to_alloc.lock_shard_by_value(&id).insert(id, GlobalAlloc::Memory(mem))
535+
{
520536
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
521537
}
522538
}
523539

524540
/// Freezes an `AllocId` created with `reserve` by pointing it at a static item. Trying to
525541
/// call this function twice, even with the same `DefId` will ICE the compiler.
526542
pub fn set_nested_alloc_id_static(self, id: AllocId, def_id: LocalDefId) {
527-
if let Some(old) =
528-
self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
543+
if let Some(old) = self
544+
.alloc_map
545+
.to_alloc
546+
.lock_shard_by_value(&id)
547+
.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
529548
{
530549
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
531550
}

compiler/rustc_middle/src/ty/context.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1366,7 +1366,7 @@ pub struct GlobalCtxt<'tcx> {
13661366
pub data_layout: TargetDataLayout,
13671367

13681368
/// Stores memory for globals (statics/consts).
1369-
pub(crate) alloc_map: Lock<interpret::AllocMap<'tcx>>,
1369+
pub(crate) alloc_map: interpret::AllocMap<'tcx>,
13701370

13711371
current_gcx: CurrentGcx,
13721372
}
@@ -1583,7 +1583,7 @@ impl<'tcx> TyCtxt<'tcx> {
15831583
new_solver_evaluation_cache: Default::default(),
15841584
canonical_param_env_cache: Default::default(),
15851585
data_layout,
1586-
alloc_map: Lock::new(interpret::AllocMap::new()),
1586+
alloc_map: interpret::AllocMap::new(),
15871587
current_gcx,
15881588
});
15891589

0 commit comments

Comments
 (0)