Skip to content

[scudo] Add primary option to enable/disable cache blocks. #129794

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler-rt/lib/scudo/standalone/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ set(SCUDO_HEADERS
internal_defs.h
linux.h
list.h
local_cache.h
memtag.h
mem_map.h
mem_map_base.h
Expand All @@ -90,6 +89,7 @@ set(SCUDO_HEADERS
report.h
report_linux.h
secondary.h
size_class_allocator.h
size_class_map.h
stack_depot.h
stats.h
Expand Down
4 changes: 4 additions & 0 deletions compiler-rt/lib/scudo/standalone/allocator_config.def
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ PRIMARY_REQUIRED(const s32, MaxReleaseToOsIntervalMs)

// PRIMARY_OPTIONAL(TYPE, NAME, DEFAULT)
//

// Enables/disables primary block caching. Batch class still caches.
PRIMARY_OPTIONAL(const bool, EnableBlockCache, true)

// The scale of a compact pointer. E.g., Ptr = Base + (CompactPtr << Scale).
PRIMARY_OPTIONAL(const uptr, CompactPtrScale, SCUDO_MIN_ALIGNMENT_LOG)

Expand Down
47 changes: 27 additions & 20 deletions compiler-rt/lib/scudo/standalone/combined.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
#include "common.h"
#include "flags.h"
#include "flags_parser.h"
#include "local_cache.h"
#include "mem_map.h"
#include "memtag.h"
#include "mutex.h"
#include "options.h"
#include "quarantine.h"
#include "report.h"
#include "secondary.h"
#include "size_class_allocator.h"
#include "stack_depot.h"
#include "string_utils.h"
#include "tsd.h"
Expand Down Expand Up @@ -54,7 +54,7 @@ class Allocator {
typename AllocatorConfig::template PrimaryT<PrimaryConfig<Config>>;
using SecondaryT =
typename AllocatorConfig::template SecondaryT<SecondaryConfig<Config>>;
using CacheT = typename PrimaryT::CacheT;
using SizeClassAllocatorT = typename PrimaryT::SizeClassAllocatorT;
typedef Allocator<Config, PostInitCallback> ThisT;
typedef typename AllocatorConfig::template TSDRegistryT<ThisT> TSDRegistryT;

Expand All @@ -63,8 +63,9 @@ class Allocator {
}

struct QuarantineCallback {
explicit QuarantineCallback(ThisT &Instance, CacheT &LocalCache)
: Allocator(Instance), Cache(LocalCache) {}
explicit QuarantineCallback(ThisT &Instance,
SizeClassAllocatorT &SizeClassAllocator)
: Allocator(Instance), SizeClassAllocator(SizeClassAllocator) {}

// Chunk recycling function, returns a quarantined chunk to the backend,
// first making sure it hasn't been tampered with.
Expand All @@ -80,7 +81,7 @@ class Allocator {
if (allocatorSupportsMemoryTagging<AllocatorConfig>())
Ptr = untagPointer(Ptr);
void *BlockBegin = Allocator::getBlockBegin(Ptr, &Header);
Cache.deallocate(Header.ClassId, BlockBegin);
SizeClassAllocator.deallocate(Header.ClassId, BlockBegin);
}

// We take a shortcut when allocating a quarantine batch by working with the
Expand All @@ -89,7 +90,7 @@ class Allocator {
void *allocate(UNUSED uptr Size) {
const uptr QuarantineClassId = SizeClassMap::getClassIdBySize(
sizeof(QuarantineBatch) + Chunk::getHeaderSize());
void *Ptr = Cache.allocate(QuarantineClassId);
void *Ptr = SizeClassAllocator.allocate(QuarantineClassId);
// Quarantine batch allocation failure is fatal.
if (UNLIKELY(!Ptr))
reportOutOfMemory(SizeClassMap::getSizeByClassId(QuarantineClassId));
Expand Down Expand Up @@ -126,14 +127,15 @@ class Allocator {

Header.State = Chunk::State::Available;
Chunk::storeHeader(Allocator.Cookie, Ptr, &Header);
Cache.deallocate(QuarantineClassId,
reinterpret_cast<void *>(reinterpret_cast<uptr>(Ptr) -
Chunk::getHeaderSize()));
SizeClassAllocator.deallocate(
QuarantineClassId,
reinterpret_cast<void *>(reinterpret_cast<uptr>(Ptr) -
Chunk::getHeaderSize()));
}

private:
ThisT &Allocator;
CacheT &Cache;
SizeClassAllocatorT &SizeClassAllocator;
};

typedef GlobalQuarantine<QuarantineCallback, void> QuarantineT;
Expand Down Expand Up @@ -263,7 +265,9 @@ class Allocator {
QuarantineT *getQuarantine() { return &Quarantine; }

// The Cache must be provided zero-initialized.
void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); }
void initAllocator(SizeClassAllocatorT *SizeClassAllocator) {
SizeClassAllocator->init(&Stats, &Primary);
}

// Release the resources used by a TSD, which involves:
// - draining the local quarantine cache to the global quarantine;
Expand All @@ -273,15 +277,16 @@ class Allocator {
void commitBack(TSD<ThisT> *TSD) {
TSD->assertLocked(/*BypassCheck=*/true);
Quarantine.drain(&TSD->getQuarantineCache(),
QuarantineCallback(*this, TSD->getCache()));
TSD->getCache().destroy(&Stats);
QuarantineCallback(*this, TSD->getSizeClassAllocator()));
TSD->getSizeClassAllocator().destroy(&Stats);
}

void drainCache(TSD<ThisT> *TSD) {
TSD->assertLocked(/*BypassCheck=*/true);
Quarantine.drainAndRecycle(&TSD->getQuarantineCache(),
QuarantineCallback(*this, TSD->getCache()));
TSD->getCache().drain();
Quarantine.drainAndRecycle(
&TSD->getQuarantineCache(),
QuarantineCallback(*this, TSD->getSizeClassAllocator()));
TSD->getSizeClassAllocator().drain();
}
void drainCaches() { TSDRegistry.drainCaches(this); }

Expand Down Expand Up @@ -390,13 +395,13 @@ class Allocator {
ClassId = SizeClassMap::getClassIdBySize(NeededSize);
DCHECK_NE(ClassId, 0U);
typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
Block = TSD->getCache().allocate(ClassId);
Block = TSD->getSizeClassAllocator().allocate(ClassId);
// If the allocation failed, retry in each successively larger class until
// it fits. If it fails to fit in the largest class, fallback to the
// Secondary.
if (UNLIKELY(!Block)) {
while (ClassId < SizeClassMap::LargestClassId && !Block)
Block = TSD->getCache().allocate(++ClassId);
Block = TSD->getSizeClassAllocator().allocate(++ClassId);
if (!Block)
ClassId = 0;
}
Expand Down Expand Up @@ -1280,7 +1285,8 @@ class Allocator {
bool CacheDrained;
{
typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
CacheDrained = TSD->getCache().deallocate(ClassId, BlockBegin);
CacheDrained =
TSD->getSizeClassAllocator().deallocate(ClassId, BlockBegin);
}
// When we have drained some blocks back to the Primary from TSD, that
// implies that we may have the chance to release some pages as well.
Expand All @@ -1296,7 +1302,8 @@ class Allocator {
retagBlock(Options, TaggedPtr, Ptr, Header, Size, false);
typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
Quarantine.put(&TSD->getQuarantineCache(),
QuarantineCallback(*this, TSD->getCache()), Ptr, Size);
QuarantineCallback(*this, TSD->getSizeClassAllocator()),
Ptr, Size);
}
}

Expand Down
77 changes: 43 additions & 34 deletions compiler-rt/lib/scudo/standalone/primary32.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
#include "bytemap.h"
#include "common.h"
#include "list.h"
#include "local_cache.h"
#include "options.h"
#include "release.h"
#include "report.h"
#include "size_class_allocator.h"
#include "stats.h"
#include "string_utils.h"
#include "thread_annotations.h"
Expand Down Expand Up @@ -52,7 +52,10 @@ template <typename Config> class SizeClassAllocator32 {
static_assert((1UL << Config::getRegionSizeLog()) >= SizeClassMap::MaxSize,
"");
typedef SizeClassAllocator32<Config> ThisT;
typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
using SizeClassAllocatorT =
typename Conditional<Config::getEnableBlockCache(),
SizeClassAllocatorLocalCache<ThisT>,
SizeClassAllocatorNoCache<ThisT>>::type;
typedef TransferBatch<ThisT> TransferBatchT;
typedef BatchGroup<ThisT> BatchGroupT;

Expand Down Expand Up @@ -191,25 +194,28 @@ template <typename Config> class SizeClassAllocator32 {
return BlockSize > PageSize;
}

u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
const u16 MaxBlockCount) {
u16 popBlocks(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId,
CompactPtrT *ToArray, const u16 MaxBlockCount) {
DCHECK_LT(ClassId, NumClasses);
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
ScopedLock L(Sci->Mutex);

u16 PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount);
u16 PopCount =
popBlocksImpl(SizeClassAllocator, ClassId, Sci, ToArray, MaxBlockCount);
if (UNLIKELY(PopCount == 0)) {
if (UNLIKELY(!populateFreeList(C, ClassId, Sci)))
if (UNLIKELY(!populateFreeList(SizeClassAllocator, ClassId, Sci)))
return 0U;
PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount);
PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Sci, ToArray,
MaxBlockCount);
DCHECK_NE(PopCount, 0U);
}

return PopCount;
}

// Push the array of free blocks to the designated batch group.
void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) {
void pushBlocks(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId,
CompactPtrT *Array, u32 Size) {
DCHECK_LT(ClassId, NumClasses);
DCHECK_GT(Size, 0);

Expand Down Expand Up @@ -240,7 +246,7 @@ template <typename Config> class SizeClassAllocator32 {
}

ScopedLock L(Sci->Mutex);
pushBlocksImpl(C, ClassId, Sci, Array, Size, SameGroup);
pushBlocksImpl(SizeClassAllocator, ClassId, Sci, Array, Size, SameGroup);
}

void disable() NO_THREAD_SAFETY_ANALYSIS {
Expand Down Expand Up @@ -529,8 +535,8 @@ template <typename Config> class SizeClassAllocator32 {
// memory group here.
BG->CompactPtrGroupBase = 0;
BG->BytesInBGAtLastCheckpoint = 0;
BG->MaxCachedPerBatch =
CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId));
BG->MaxCachedPerBatch = SizeClassAllocatorT::getMaxCached(
getSizeByClassId(SizeClassMap::BatchClassId));

Sci->FreeListInfo.BlockList.push_front(BG);
}
Expand Down Expand Up @@ -597,18 +603,18 @@ template <typename Config> class SizeClassAllocator32 {
// same group then we will skip checking the group id of each block.
//
// The region mutex needs to be held while calling this method.
void pushBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci,
CompactPtrT *Array, u32 Size, bool SameGroup = false)
REQUIRES(Sci->Mutex) {
void pushBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId,
SizeClassInfo *Sci, CompactPtrT *Array, u32 Size,
bool SameGroup = false) REQUIRES(Sci->Mutex) {
DCHECK_NE(ClassId, SizeClassMap::BatchClassId);
DCHECK_GT(Size, 0U);

auto CreateGroup = [&](uptr CompactPtrGroupBase) {
BatchGroupT *BG =
reinterpret_cast<BatchGroupT *>(C->getBatchClassBlock());
BatchGroupT *BG = reinterpret_cast<BatchGroupT *>(
SizeClassAllocator->getBatchClassBlock());
BG->Batches.clear();
TransferBatchT *TB =
reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(
SizeClassAllocator->getBatchClassBlock());
TB->clear();

BG->CompactPtrGroupBase = CompactPtrGroupBase;
Expand All @@ -629,8 +635,8 @@ template <typename Config> class SizeClassAllocator32 {
u16 UnusedSlots =
static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
if (UnusedSlots == 0) {
CurBatch =
reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
CurBatch = reinterpret_cast<TransferBatchT *>(
SizeClassAllocator->getBatchClassBlock());
CurBatch->clear();
Batches.push_front(CurBatch);
UnusedSlots = BG->MaxCachedPerBatch;
Expand Down Expand Up @@ -704,9 +710,9 @@ template <typename Config> class SizeClassAllocator32 {
InsertBlocks(Cur, Array + Size - Count, Count);
}

u16 popBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci,
CompactPtrT *ToArray, const u16 MaxBlockCount)
REQUIRES(Sci->Mutex) {
u16 popBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId,
SizeClassInfo *Sci, CompactPtrT *ToArray,
const u16 MaxBlockCount) REQUIRES(Sci->Mutex) {
if (Sci->FreeListInfo.BlockList.empty())
return 0U;

Expand All @@ -730,11 +736,11 @@ template <typename Config> class SizeClassAllocator32 {
// So far, instead of always filling the blocks to `MaxBlockCount`, we only
// examine single `TransferBatch` to minimize the time spent on the primary
// allocator. Besides, the sizes of `TransferBatch` and
// `CacheT::getMaxCached()` may also impact the time spent on accessing the
// primary allocator.
// `SizeClassAllocatorT::getMaxCached()` may also impact the time spent on
// accessing the primary allocator.
// TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
// blocks and/or adjust the size of `TransferBatch` according to
// `CacheT::getMaxCached()`.
// `SizeClassAllocatorT::getMaxCached()`.
TransferBatchT *B = Batches.front();
DCHECK_NE(B, nullptr);
DCHECK_GT(B->getCount(), 0U);
Expand All @@ -754,7 +760,7 @@ template <typename Config> class SizeClassAllocator32 {
// deallocate. Read the comment in `pushBatchClassBlocks()` for more
// details.
if (ClassId != SizeClassMap::BatchClassId)
C->deallocate(SizeClassMap::BatchClassId, B);
SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, B);

if (Batches.empty()) {
BatchGroupT *BG = Sci->FreeListInfo.BlockList.front();
Expand All @@ -766,15 +772,16 @@ template <typename Config> class SizeClassAllocator32 {
// Which means, once we pop the last TransferBatch, the block is
// implicitly deallocated.
if (ClassId != SizeClassMap::BatchClassId)
C->deallocate(SizeClassMap::BatchClassId, BG);
SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, BG);
}
}

Sci->FreeListInfo.PoppedBlocks += PopCount;
return PopCount;
}

NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci)
NOINLINE bool populateFreeList(SizeClassAllocatorT *SizeClassAllocator,
uptr ClassId, SizeClassInfo *Sci)
REQUIRES(Sci->Mutex) {
uptr Region;
uptr Offset;
Expand All @@ -791,13 +798,13 @@ template <typename Config> class SizeClassAllocator32 {
Region = allocateRegion(Sci, ClassId);
if (UNLIKELY(!Region))
return false;
C->getStats().add(StatMapped, RegionSize);
SizeClassAllocator->getStats().add(StatMapped, RegionSize);
Sci->CurrentRegion = Region;
Offset = 0;
}

const uptr Size = getSizeByClassId(ClassId);
const u16 MaxCount = CacheT::getMaxCached(Size);
const u16 MaxCount = SizeClassAllocatorT::getMaxCached(Size);
DCHECK_GT(MaxCount, 0U);
// The maximum number of blocks we should carve in the region is dictated
// by the maximum number of batches we want to fill, and the amount of
Expand Down Expand Up @@ -827,7 +834,8 @@ template <typename Config> class SizeClassAllocator32 {
for (u32 I = 1; I < NumberOfBlocks; I++) {
if (UNLIKELY(compactPtrGroupBase(ShuffleArray[I]) != CurGroup)) {
shuffle(ShuffleArray + I - N, N, &Sci->RandState);
pushBlocksImpl(C, ClassId, Sci, ShuffleArray + I - N, N,
pushBlocksImpl(SizeClassAllocator, ClassId, Sci, ShuffleArray + I - N,
N,
/*SameGroup=*/true);
N = 1;
CurGroup = compactPtrGroupBase(ShuffleArray[I]);
Expand All @@ -837,7 +845,8 @@ template <typename Config> class SizeClassAllocator32 {
}

shuffle(ShuffleArray + NumberOfBlocks - N, N, &Sci->RandState);
pushBlocksImpl(C, ClassId, Sci, &ShuffleArray[NumberOfBlocks - N], N,
pushBlocksImpl(SizeClassAllocator, ClassId, Sci,
&ShuffleArray[NumberOfBlocks - N], N,
/*SameGroup=*/true);
} else {
pushBatchClassBlocks(Sci, ShuffleArray, NumberOfBlocks);
Expand All @@ -850,7 +859,7 @@ template <typename Config> class SizeClassAllocator32 {
Sci->FreeListInfo.PushedBlocks -= NumberOfBlocks;

const uptr AllocatedUser = Size * NumberOfBlocks;
C->getStats().add(StatFree, AllocatedUser);
SizeClassAllocator->getStats().add(StatFree, AllocatedUser);
DCHECK_LE(Sci->CurrentRegionAllocated + AllocatedUser, RegionSize);
// If there is not enough room in the region currently associated to fit
// more blocks, we deassociate the region by resetting CurrentRegion and
Expand Down
Loading