[scudo] Add primary option to enable/disable cache blocks. #129794

cferris1000 · 2025-03-04T23:12:13Z

When configured this way, no primary blocks will be cached except the batch class. Nothing else changes, no change in the page releasing strategy.

llvmbot · 2025-03-04T23:12:47Z

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Christopher Ferris (cferris1000)

Changes

When configured this way, no primary blocks will be cached except the batch class. Nothing else changes, no change in the page releasing strategy.

Patch is 57.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129794.diff

12 Files Affected:

(modified) compiler-rt/lib/scudo/standalone/CMakeLists.txt (+1-1)
(modified) compiler-rt/lib/scudo/standalone/allocator_config.def (+4)
(modified) compiler-rt/lib/scudo/standalone/combined.h (+27-20)
(modified) compiler-rt/lib/scudo/standalone/primary32.h (+43-34)
(modified) compiler-rt/lib/scudo/standalone/primary64.h (+60-50)
(renamed) compiler-rt/lib/scudo/standalone/size_class_allocator.h (+149-12)
(modified) compiler-rt/lib/scudo/standalone/tests/combined_test.cpp (+49-7)
(modified) compiler-rt/lib/scudo/standalone/tests/primary_test.cpp (+41-35)
(modified) compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp (+15-12)
(modified) compiler-rt/lib/scudo/standalone/tsd.h (+6-3)
(modified) compiler-rt/lib/scudo/standalone/tsd_shared.h (+1-1)
(modified) compiler-rt/lib/scudo/standalone/type_traits.h (+8)

diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt
index 3f9ae866a7553..db494a9a74a3f 100644
--- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt
+++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt
@@ -74,7 +74,6 @@ set(SCUDO_HEADERS
   internal_defs.h
   linux.h
   list.h
-  local_cache.h
   memtag.h
   mem_map.h
   mem_map_base.h
@@ -90,6 +89,7 @@ set(SCUDO_HEADERS
   report.h
   report_linux.h
   secondary.h
+  size_class_allocator.h
   size_class_map.h
   stack_depot.h
   stats.h
diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.def b/compiler-rt/lib/scudo/standalone/allocator_config.def
index 43893e9af3cf3..a79fe246fb481 100644
--- a/compiler-rt/lib/scudo/standalone/allocator_config.def
+++ b/compiler-rt/lib/scudo/standalone/allocator_config.def
@@ -81,6 +81,10 @@ PRIMARY_REQUIRED(const s32, MaxReleaseToOsIntervalMs)
 
 // PRIMARY_OPTIONAL(TYPE, NAME, DEFAULT)
 //
+
+// Enables/disables primary block caching. Batch class still caches.
+PRIMARY_OPTIONAL(const bool, EnableCache, true)
+
 // The scale of a compact pointer. E.g., Ptr = Base + (CompactPtr << Scale).
 PRIMARY_OPTIONAL(const uptr, CompactPtrScale, SCUDO_MIN_ALIGNMENT_LOG)
 
diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index 5deb8c97f1c86..43655642843cb 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -15,7 +15,6 @@
 #include "common.h"
 #include "flags.h"
 #include "flags_parser.h"
-#include "local_cache.h"
 #include "mem_map.h"
 #include "memtag.h"
 #include "mutex.h"
@@ -23,6 +22,7 @@
 #include "quarantine.h"
 #include "report.h"
 #include "secondary.h"
+#include "size_class_allocator.h"
 #include "stack_depot.h"
 #include "string_utils.h"
 #include "tsd.h"
@@ -54,7 +54,7 @@ class Allocator {
       typename AllocatorConfig::template PrimaryT<PrimaryConfig<Config>>;
   using SecondaryT =
       typename AllocatorConfig::template SecondaryT<SecondaryConfig<Config>>;
-  using CacheT = typename PrimaryT::CacheT;
+  using SizeClassAllocatorT = typename PrimaryT::SizeClassAllocatorT;
   typedef Allocator<Config, PostInitCallback> ThisT;
   typedef typename AllocatorConfig::template TSDRegistryT<ThisT> TSDRegistryT;
 
@@ -63,8 +63,9 @@ class Allocator {
   }
 
   struct QuarantineCallback {
-    explicit QuarantineCallback(ThisT &Instance, CacheT &LocalCache)
-        : Allocator(Instance), Cache(LocalCache) {}
+    explicit QuarantineCallback(ThisT &Instance,
+                                SizeClassAllocatorT &SizeClassAllocator)
+        : Allocator(Instance), SizeClassAllocator(SizeClassAllocator) {}
 
     // Chunk recycling function, returns a quarantined chunk to the backend,
     // first making sure it hasn't been tampered with.
@@ -80,7 +81,7 @@ class Allocator {
       if (allocatorSupportsMemoryTagging<AllocatorConfig>())
         Ptr = untagPointer(Ptr);
       void *BlockBegin = Allocator::getBlockBegin(Ptr, &Header);
-      Cache.deallocate(Header.ClassId, BlockBegin);
+      SizeClassAllocator.deallocate(Header.ClassId, BlockBegin);
     }
 
     // We take a shortcut when allocating a quarantine batch by working with the
@@ -89,7 +90,7 @@ class Allocator {
     void *allocate(UNUSED uptr Size) {
       const uptr QuarantineClassId = SizeClassMap::getClassIdBySize(
           sizeof(QuarantineBatch) + Chunk::getHeaderSize());
-      void *Ptr = Cache.allocate(QuarantineClassId);
+      void *Ptr = SizeClassAllocator.allocate(QuarantineClassId);
       // Quarantine batch allocation failure is fatal.
       if (UNLIKELY(!Ptr))
         reportOutOfMemory(SizeClassMap::getSizeByClassId(QuarantineClassId));
@@ -126,14 +127,15 @@ class Allocator {
 
       Header.State = Chunk::State::Available;
       Chunk::storeHeader(Allocator.Cookie, Ptr, &Header);
-      Cache.deallocate(QuarantineClassId,
-                       reinterpret_cast<void *>(reinterpret_cast<uptr>(Ptr) -
-                                                Chunk::getHeaderSize()));
+      SizeClassAllocator.deallocate(
+          QuarantineClassId,
+          reinterpret_cast<void *>(reinterpret_cast<uptr>(Ptr) -
+                                   Chunk::getHeaderSize()));
     }
 
   private:
     ThisT &Allocator;
-    CacheT &Cache;
+    SizeClassAllocatorT &SizeClassAllocator;
   };
 
   typedef GlobalQuarantine<QuarantineCallback, void> QuarantineT;
@@ -263,7 +265,9 @@ class Allocator {
   QuarantineT *getQuarantine() { return &Quarantine; }
 
   // The Cache must be provided zero-initialized.
-  void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); }
+  void initAllocator(SizeClassAllocatorT *SizeClassAllocator) {
+    SizeClassAllocator->init(&Stats, &Primary);
+  }
 
   // Release the resources used by a TSD, which involves:
   // - draining the local quarantine cache to the global quarantine;
@@ -273,15 +277,16 @@ class Allocator {
   void commitBack(TSD<ThisT> *TSD) {
     TSD->assertLocked(/*BypassCheck=*/true);
     Quarantine.drain(&TSD->getQuarantineCache(),
-                     QuarantineCallback(*this, TSD->getCache()));
-    TSD->getCache().destroy(&Stats);
+                     QuarantineCallback(*this, TSD->getSizeClassAllocator()));
+    TSD->getSizeClassAllocator().destroy(&Stats);
   }
 
   void drainCache(TSD<ThisT> *TSD) {
     TSD->assertLocked(/*BypassCheck=*/true);
-    Quarantine.drainAndRecycle(&TSD->getQuarantineCache(),
-                               QuarantineCallback(*this, TSD->getCache()));
-    TSD->getCache().drain();
+    Quarantine.drainAndRecycle(
+        &TSD->getQuarantineCache(),
+        QuarantineCallback(*this, TSD->getSizeClassAllocator()));
+    TSD->getSizeClassAllocator().drain();
   }
   void drainCaches() { TSDRegistry.drainCaches(this); }
 
@@ -390,13 +395,13 @@ class Allocator {
       ClassId = SizeClassMap::getClassIdBySize(NeededSize);
       DCHECK_NE(ClassId, 0U);
       typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
-      Block = TSD->getCache().allocate(ClassId);
+      Block = TSD->getSizeClassAllocator().allocate(ClassId);
       // If the allocation failed, retry in each successively larger class until
       // it fits. If it fails to fit in the largest class, fallback to the
       // Secondary.
       if (UNLIKELY(!Block)) {
         while (ClassId < SizeClassMap::LargestClassId && !Block)
-          Block = TSD->getCache().allocate(++ClassId);
+          Block = TSD->getSizeClassAllocator().allocate(++ClassId);
         if (!Block)
           ClassId = 0;
       }
@@ -1280,7 +1285,8 @@ class Allocator {
         bool CacheDrained;
         {
           typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
-          CacheDrained = TSD->getCache().deallocate(ClassId, BlockBegin);
+          CacheDrained =
+              TSD->getSizeClassAllocator().deallocate(ClassId, BlockBegin);
         }
         // When we have drained some blocks back to the Primary from TSD, that
         // implies that we may have the chance to release some pages as well.
@@ -1296,7 +1302,8 @@ class Allocator {
         retagBlock(Options, TaggedPtr, Ptr, Header, Size, false);
       typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
       Quarantine.put(&TSD->getQuarantineCache(),
-                     QuarantineCallback(*this, TSD->getCache()), Ptr, Size);
+                     QuarantineCallback(*this, TSD->getSizeClassAllocator()),
+                     Ptr, Size);
     }
   }
 
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index 596c48f004b22..8e443951c2f17 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -13,10 +13,10 @@
 #include "bytemap.h"
 #include "common.h"
 #include "list.h"
-#include "local_cache.h"
 #include "options.h"
 #include "release.h"
 #include "report.h"
+#include "size_class_allocator.h"
 #include "stats.h"
 #include "string_utils.h"
 #include "thread_annotations.h"
@@ -52,7 +52,10 @@ template <typename Config> class SizeClassAllocator32 {
   static_assert((1UL << Config::getRegionSizeLog()) >= SizeClassMap::MaxSize,
                 "");
   typedef SizeClassAllocator32<Config> ThisT;
-  typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
+  using SizeClassAllocatorT =
+      typename Conditional<Config::getEnableCache(),
+                           SizeClassAllocatorLocalCache<ThisT>,
+                           SizeClassAllocatorNoCache<ThisT>>::type;
   typedef TransferBatch<ThisT> TransferBatchT;
   typedef BatchGroup<ThisT> BatchGroupT;
 
@@ -191,17 +194,19 @@ template <typename Config> class SizeClassAllocator32 {
     return BlockSize > PageSize;
   }
 
-  u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
-                const u16 MaxBlockCount) {
+  u16 popBlocks(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId,
+                CompactPtrT *ToArray, const u16 MaxBlockCount) {
     DCHECK_LT(ClassId, NumClasses);
     SizeClassInfo *Sci = getSizeClassInfo(ClassId);
     ScopedLock L(Sci->Mutex);
 
-    u16 PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount);
+    u16 PopCount =
+        popBlocksImpl(SizeClassAllocator, ClassId, Sci, ToArray, MaxBlockCount);
     if (UNLIKELY(PopCount == 0)) {
-      if (UNLIKELY(!populateFreeList(C, ClassId, Sci)))
+      if (UNLIKELY(!populateFreeList(SizeClassAllocator, ClassId, Sci)))
         return 0U;
-      PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount);
+      PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Sci, ToArray,
+                               MaxBlockCount);
       DCHECK_NE(PopCount, 0U);
     }
 
@@ -209,7 +214,8 @@ template <typename Config> class SizeClassAllocator32 {
   }
 
   // Push the array of free blocks to the designated batch group.
-  void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) {
+  void pushBlocks(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId,
+                  CompactPtrT *Array, u32 Size) {
     DCHECK_LT(ClassId, NumClasses);
     DCHECK_GT(Size, 0);
 
@@ -240,7 +246,7 @@ template <typename Config> class SizeClassAllocator32 {
     }
 
     ScopedLock L(Sci->Mutex);
-    pushBlocksImpl(C, ClassId, Sci, Array, Size, SameGroup);
+    pushBlocksImpl(SizeClassAllocator, ClassId, Sci, Array, Size, SameGroup);
   }
 
   void disable() NO_THREAD_SAFETY_ANALYSIS {
@@ -529,8 +535,8 @@ template <typename Config> class SizeClassAllocator32 {
       // memory group here.
       BG->CompactPtrGroupBase = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch =
-          CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId));
+      BG->MaxCachedPerBatch = SizeClassAllocatorT::getMaxCached(
+          getSizeByClassId(SizeClassMap::BatchClassId));
 
       Sci->FreeListInfo.BlockList.push_front(BG);
     }
@@ -597,18 +603,18 @@ template <typename Config> class SizeClassAllocator32 {
   // same group then we will skip checking the group id of each block.
   //
   // The region mutex needs to be held while calling this method.
-  void pushBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci,
-                      CompactPtrT *Array, u32 Size, bool SameGroup = false)
-      REQUIRES(Sci->Mutex) {
+  void pushBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId,
+                      SizeClassInfo *Sci, CompactPtrT *Array, u32 Size,
+                      bool SameGroup = false) REQUIRES(Sci->Mutex) {
     DCHECK_NE(ClassId, SizeClassMap::BatchClassId);
     DCHECK_GT(Size, 0U);
 
     auto CreateGroup = [&](uptr CompactPtrGroupBase) {
-      BatchGroupT *BG =
-          reinterpret_cast<BatchGroupT *>(C->getBatchClassBlock());
+      BatchGroupT *BG = reinterpret_cast<BatchGroupT *>(
+          SizeClassAllocator->getBatchClassBlock());
       BG->Batches.clear();
-      TransferBatchT *TB =
-          reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
+      TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(
+          SizeClassAllocator->getBatchClassBlock());
       TB->clear();
 
       BG->CompactPtrGroupBase = CompactPtrGroupBase;
@@ -629,8 +635,8 @@ template <typename Config> class SizeClassAllocator32 {
         u16 UnusedSlots =
             static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
         if (UnusedSlots == 0) {
-          CurBatch =
-              reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
+          CurBatch = reinterpret_cast<TransferBatchT *>(
+              SizeClassAllocator->getBatchClassBlock());
           CurBatch->clear();
           Batches.push_front(CurBatch);
           UnusedSlots = BG->MaxCachedPerBatch;
@@ -704,9 +710,9 @@ template <typename Config> class SizeClassAllocator32 {
     InsertBlocks(Cur, Array + Size - Count, Count);
   }
 
-  u16 popBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci,
-                    CompactPtrT *ToArray, const u16 MaxBlockCount)
-      REQUIRES(Sci->Mutex) {
+  u16 popBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId,
+                    SizeClassInfo *Sci, CompactPtrT *ToArray,
+                    const u16 MaxBlockCount) REQUIRES(Sci->Mutex) {
     if (Sci->FreeListInfo.BlockList.empty())
       return 0U;
 
@@ -730,11 +736,11 @@ template <typename Config> class SizeClassAllocator32 {
     // So far, instead of always filling the blocks to `MaxBlockCount`, we only
     // examine single `TransferBatch` to minimize the time spent on the primary
     // allocator. Besides, the sizes of `TransferBatch` and
-    // `CacheT::getMaxCached()` may also impact the time spent on accessing the
-    // primary allocator.
+    // `SizeClassAllocatorT::getMaxCached()` may also impact the time spent on
+    // accessing the primary allocator.
     // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
     // blocks and/or adjust the size of `TransferBatch` according to
-    // `CacheT::getMaxCached()`.
+    // `SizeClassAllocatorT::getMaxCached()`.
     TransferBatchT *B = Batches.front();
     DCHECK_NE(B, nullptr);
     DCHECK_GT(B->getCount(), 0U);
@@ -754,7 +760,7 @@ template <typename Config> class SizeClassAllocator32 {
       // deallocate. Read the comment in `pushBatchClassBlocks()` for more
       // details.
       if (ClassId != SizeClassMap::BatchClassId)
-        C->deallocate(SizeClassMap::BatchClassId, B);
+        SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, B);
 
       if (Batches.empty()) {
         BatchGroupT *BG = Sci->FreeListInfo.BlockList.front();
@@ -766,7 +772,7 @@ template <typename Config> class SizeClassAllocator32 {
         // Which means, once we pop the last TransferBatch, the block is
         // implicitly deallocated.
         if (ClassId != SizeClassMap::BatchClassId)
-          C->deallocate(SizeClassMap::BatchClassId, BG);
+          SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, BG);
       }
     }
 
@@ -774,7 +780,8 @@ template <typename Config> class SizeClassAllocator32 {
     return PopCount;
   }
 
-  NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci)
+  NOINLINE bool populateFreeList(SizeClassAllocatorT *SizeClassAllocator,
+                                 uptr ClassId, SizeClassInfo *Sci)
       REQUIRES(Sci->Mutex) {
     uptr Region;
     uptr Offset;
@@ -791,13 +798,13 @@ template <typename Config> class SizeClassAllocator32 {
       Region = allocateRegion(Sci, ClassId);
       if (UNLIKELY(!Region))
         return false;
-      C->getStats().add(StatMapped, RegionSize);
+      SizeClassAllocator->getStats().add(StatMapped, RegionSize);
       Sci->CurrentRegion = Region;
       Offset = 0;
     }
 
     const uptr Size = getSizeByClassId(ClassId);
-    const u16 MaxCount = CacheT::getMaxCached(Size);
+    const u16 MaxCount = SizeClassAllocatorT::getMaxCached(Size);
     DCHECK_GT(MaxCount, 0U);
     // The maximum number of blocks we should carve in the region is dictated
     // by the maximum number of batches we want to fill, and the amount of
@@ -827,7 +834,8 @@ template <typename Config> class SizeClassAllocator32 {
       for (u32 I = 1; I < NumberOfBlocks; I++) {
         if (UNLIKELY(compactPtrGroupBase(ShuffleArray[I]) != CurGroup)) {
           shuffle(ShuffleArray + I - N, N, &Sci->RandState);
-          pushBlocksImpl(C, ClassId, Sci, ShuffleArray + I - N, N,
+          pushBlocksImpl(SizeClassAllocator, ClassId, Sci, ShuffleArray + I - N,
+                         N,
                          /*SameGroup=*/true);
           N = 1;
           CurGroup = compactPtrGroupBase(ShuffleArray[I]);
@@ -837,7 +845,8 @@ template <typename Config> class SizeClassAllocator32 {
       }
 
       shuffle(ShuffleArray + NumberOfBlocks - N, N, &Sci->RandState);
-      pushBlocksImpl(C, ClassId, Sci, &ShuffleArray[NumberOfBlocks - N], N,
+      pushBlocksImpl(SizeClassAllocator, ClassId, Sci,
+                     &ShuffleArray[NumberOfBlocks - N], N,
                      /*SameGroup=*/true);
     } else {
       pushBatchClassBlocks(Sci, ShuffleArray, NumberOfBlocks);
@@ -850,7 +859,7 @@ template <typename Config> class SizeClassAllocator32 {
     Sci->FreeListInfo.PushedBlocks -= NumberOfBlocks;
 
     const uptr AllocatedUser = Size * NumberOfBlocks;
-    C->getStats().add(StatFree, AllocatedUser);
+    SizeClassAllocator->getStats().add(StatFree, AllocatedUser);
     DCHECK_LE(Sci->CurrentRegionAllocated + AllocatedUser, RegionSize);
     // If there is not enough room in the region currently associated to fit
     // more blocks, we deassociate the region by resetting CurrentRegion and
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index e7da849c339bf..e0b2d9975062e 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -14,11 +14,11 @@
 #include "common.h"
 #include "condition_variable.h"
 #include "list.h"
-#include "local_cache.h"
 #include "mem_map.h"
 #include "memtag.h"
 #include "options.h"
 #include "release.h"
+#include "size_class_allocator.h"
 #include "stats.h"
 #include "string_utils.h"
 #include "thread_annotations.h"
@@ -57,9 +57,12 @@ template <typename Config> class SizeClassAllocator64 {
                 "Group size shouldn't be greater than the region size");
   static const uptr GroupScale = GroupSizeLog - CompactPtrScale;
   typedef SizeClassAllocator64<Config> ThisT;
-  typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
   typedef TransferBatch<ThisT> TransferBatchT;
   typedef BatchGroup<ThisT> BatchGroupT;
+  using SizeClassAllocatorT =
+      typename Conditional<Config::getEnableCache(),
+                           SizeClassAllocatorLocalCache<ThisT>,
+                           SizeClassAllocatorNoCache<ThisT>>::type;
 
   // BachClass is used to store internal metadata so it needs to be at least as
   // large as the largest data structure.
@@ -215,15 +218,16 @@ template <typename Config> class SizeClassAllocator64 {
     DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
   }
 
-  u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
-                const u16 MaxBlockCount) {
+  u16 popBlocks(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId,
+                CompactPtrT *ToArray, const u16 MaxBlockCount) {
     DCHECK_LT(ClassId, NumClasses);
     RegionInfo *Region = getRegionInfo(ClassId);
     u16 PopCount = 0;
 
     {
       ScopedLock L(Region->FLLock);
-      PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
+      PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, ToArray,
+                               MaxBlockCount);
       if (PopCount != 0U)
         return PopCount;
     }
@@ -231,8 +235,8 @@ template <typename Config> class SizeClassAllocator64 {
     bool ReportRegionExhausted = false;
 
     if (conditionVariableEnabled()) {
-      PopCount = popBlocksWithCV(C, ClassId, Region, ToArray, MaxBlockCount,
-                                 ReportRegionExhausted);
+      PopCount = popBlocksWithCV(SizeClassAllocator, ClassId, Region, ToArray,
+                                 MaxBlockCount, ReportRegionExhausted);
     } else {
       while (true) {
         // When two threads compete for `Region->MMLock`, we only want one of
@@ -241,15 +245,16 @@ template <typename Config> class SizeClassAllocator64 {...
[truncated]

ajordanr-google · 2025-03-05T00:42:44Z

I see this, and I should have some time tomorrow to take a look and get back to you.

At first glance, is this related to the Scudo thread-local caches, and thus this gives the option to have no locally cached blocks? It was unclear from the commit message and the option description.

cferris1000 · 2025-03-05T21:27:35Z

Yes, that is approximately what this is trying to do. I kept trying to come up with a better config name, because we aren't really turning off all caching, just most of it in the primary.

ajordanr-google

Conceptually, this all makes sense to me.

The exact algorithmic details of the Batch Allocate/Deallocate are not super clear to me, but I do understand why it's useful.

The motivation of the whole PR has not quite hit me yet. I assume it's for lower memory usage during multi-threaded execution, but it's not clear from the option that this is the intent.

Otherwise my remaining comments are nits/LLVM style guide things.

compiler-rt/lib/scudo/standalone/primary32.h

compiler-rt/lib/scudo/standalone/size_class_allocator.h

compiler-rt/lib/scudo/standalone/type_traits.h

compiler-rt/lib/scudo/standalone/size_class_allocator.h

compiler-rt/lib/scudo/standalone/allocator_config.def

compiler-rt/lib/scudo/standalone/size_class_allocator.h

cferris1000 · 2025-03-19T23:09:35Z

I've pushed a new version. I didn't resolve any comments yet since it becomes harder to track things.

ajordanr-google · 2025-04-03T18:16:50Z

Sorry about the delay here; didn't get a re-request ping. I'll be able to take a look tomorrow.

cferris1000 · 2025-04-04T22:05:44Z

Sorry about the delay here; didn't get a re-request ping. I'll be able to take a look tomorrow.

No worries, it's my fault because I took so long to respond. I'll make sure to be faster on the replies from now on.

ajordanr-google

Sorry for the delay, thanks for change!

compiler-rt/lib/scudo/standalone/type_traits.h

compiler-rt/lib/scudo/standalone/size_class_allocator.h

compiler-rt/lib/scudo/standalone/primary32.h

llvm-ci · 2025-04-17T21:59:47Z

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-gcc-ubuntu running on sie-linux-worker3 while building compiler-rt at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/16379

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: 1200 seconds without output running [b'ninja', b'check-all'], attempting to kill
...
PASS: mlgo-utils :: corpus/make_corpus_test.py (90263 of 90273)
PASS: mlgo-utils :: corpus/combine_training_corpus_test.py (90264 of 90273)
PASS: mlgo-utils :: corpus/extract_ir_script.test (90265 of 90273)
PASS: lld :: wasm/whole-archive.test (90266 of 90273)
PASS: lld :: wasm/why-extract.s (90267 of 90273)
PASS: lld :: MachO/loh-adrp-add.s (90268 of 90273)
PASS: mlgo-utils :: corpus/extract_ir_test.py (90269 of 90273)
PASS: lld :: MinGW/driver.test (90270 of 90273)
PASS: libFuzzer-x86_64-default-Linux :: fork_corpus_groups.test (90271 of 90273)
PASS: libFuzzer-x86_64-default-Linux :: fork.test (90272 of 90273)
command timed out: 1200 seconds without output running [b'ninja', b'check-all'], attempting to kill
process killed by signal 9
program finished with exit code -1
elapsedTime=1857.722467

When configured this way, no primary blocks will be cached except the batch class. Nothing else changes, no change in the page releasing strategy.

[scudo] Add primary option to enable/disable cache blocks.

c71497c

When configured this way, no primary blocks will be cached except the batch class. Nothing else changes, no change in the page releasing strategy.

cferris1000 requested a review from ajordanr-google March 4, 2025 23:12

llvmbot added compiler-rt compiler-rt:scudo Scudo Hardened Allocator compiler-rt:sanitizer labels Mar 4, 2025

ajordanr-google reviewed Mar 7, 2025

View reviewed changes

Updates for comments.

b3240e1

ajordanr-google self-requested a review April 3, 2025 18:16

ajordanr-google approved these changes Apr 4, 2025

View reviewed changes

compiler-rt/lib/scudo/standalone/type_traits.h Show resolved Hide resolved

compiler-rt/lib/scudo/standalone/size_class_allocator.h Show resolved Hide resolved

compiler-rt/lib/scudo/standalone/primary32.h Show resolved Hide resolved

cferris1000 merged commit 1756fcb into llvm:main Apr 17, 2025
10 checks passed

cferris1000 deleted the no_cache branch April 17, 2025 21:23

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[scudo] Add primary option to enable/disable cache blocks. #129794

[scudo] Add primary option to enable/disable cache blocks. #129794

Uh oh!

cferris1000 commented Mar 4, 2025

Uh oh!

llvmbot commented Mar 4, 2025

Uh oh!

ajordanr-google commented Mar 5, 2025

Uh oh!

cferris1000 commented Mar 5, 2025

Uh oh!

ajordanr-google left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

cferris1000 commented Mar 19, 2025

Uh oh!

ajordanr-google commented Apr 3, 2025

Uh oh!

cferris1000 commented Apr 4, 2025

Uh oh!

ajordanr-google left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

llvm-ci commented Apr 17, 2025

Uh oh!

Uh oh!

[scudo] Add primary option to enable/disable cache blocks. #129794

[scudo] Add primary option to enable/disable cache blocks. #129794

Uh oh!

Conversation

cferris1000 commented Mar 4, 2025

Uh oh!

llvmbot commented Mar 4, 2025

Uh oh!

ajordanr-google commented Mar 5, 2025

Uh oh!

cferris1000 commented Mar 5, 2025

Uh oh!

ajordanr-google left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

cferris1000 commented Mar 19, 2025

Uh oh!

ajordanr-google commented Apr 3, 2025

Uh oh!

cferris1000 commented Apr 4, 2025

Uh oh!

ajordanr-google left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

llvm-ci commented Apr 17, 2025

Uh oh!

Uh oh!