diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index 756adcadd0c7b..e2de50b93adc3 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -76,315 +76,64 @@ template class SizeClassAllocator32 { static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } - void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { - if (SCUDO_FUCHSIA) - reportError("SizeClassAllocator32 is not supported on Fuchsia"); - - if (SCUDO_TRUSTY) - reportError("SizeClassAllocator32 is not supported on Trusty"); - - DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); - PossibleRegions.init(); - u32 Seed; - const u64 Time = getMonotonicTimeFast(); - if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) - Seed = static_cast( - Time ^ (reinterpret_cast(SizeClassInfoArray) >> 6)); - for (uptr I = 0; I < NumClasses; I++) { - SizeClassInfo *Sci = getSizeClassInfo(I); - Sci->RandState = getRandomU32(&Seed); - // Sci->MaxRegionIndex is already initialized to 0. - Sci->MinRegionIndex = NumRegions; - Sci->ReleaseInfo.LastReleaseAtNs = Time; - } - - // The default value in the primary config has the higher priority. - if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN) - ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs(); - setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); - } - - void unmapTestOnly() { - { - ScopedLock L(RegionsStashMutex); - while (NumberOfStashedRegions > 0) { - unmap(reinterpret_cast(RegionsStash[--NumberOfStashedRegions]), - RegionSize); - } - } - - uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; - for (uptr I = 0; I < NumClasses; I++) { - SizeClassInfo *Sci = getSizeClassInfo(I); - ScopedLock L(Sci->Mutex); - if (Sci->MinRegionIndex < MinRegionIndex) - MinRegionIndex = Sci->MinRegionIndex; - if (Sci->MaxRegionIndex > MaxRegionIndex) - MaxRegionIndex = Sci->MaxRegionIndex; - *Sci = {}; - } + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS; - ScopedLock L(ByteMapMutex); - for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) - if (PossibleRegions[I]) - unmap(reinterpret_cast(I * RegionSize), RegionSize); - PossibleRegions.unmapTestOnly(); - } + void unmapTestOnly(); // When all blocks are freed, it has to be the same size as `AllocatedUser`. - void verifyAllBlocksAreReleasedTestOnly() { - // `BatchGroup` and `Batch` also use the blocks from BatchClass. - uptr BatchClassUsedInFreeLists = 0; - for (uptr I = 0; I < NumClasses; I++) { - // We have to count BatchClassUsedInFreeLists in other regions first. - if (I == SizeClassMap::BatchClassId) - continue; - SizeClassInfo *Sci = getSizeClassInfo(I); - ScopedLock L1(Sci->Mutex); - uptr TotalBlocks = 0; - for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { - // `BG::Batches` are `Batches`. +1 for `BatchGroup`. - BatchClassUsedInFreeLists += BG.Batches.size() + 1; - for (const auto &It : BG.Batches) - TotalBlocks += It.getCount(); - } - - const uptr BlockSize = getSizeByClassId(I); - DCHECK_EQ(TotalBlocks, Sci->AllocatedUser / BlockSize); - DCHECK_EQ(Sci->FreeListInfo.PushedBlocks, Sci->FreeListInfo.PoppedBlocks); - } - - SizeClassInfo *Sci = getSizeClassInfo(SizeClassMap::BatchClassId); - ScopedLock L1(Sci->Mutex); - uptr TotalBlocks = 0; - for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { - if (LIKELY(!BG.Batches.empty())) { - for (const auto &It : BG.Batches) - TotalBlocks += It.getCount(); - } else { - // `BatchGroup` with empty freelist doesn't have `Batch` record - // itself. - ++TotalBlocks; - } - } - - const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId); - DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists, - Sci->AllocatedUser / BlockSize); - const uptr BlocksInUse = - Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; - DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); - } + void verifyAllBlocksAreReleasedTestOnly(); CompactPtrT compactPtr(UNUSED uptr ClassId, uptr Ptr) const { return static_cast(Ptr); } - void *decompactPtr(UNUSED uptr ClassId, CompactPtrT CompactPtr) const { return reinterpret_cast(static_cast(CompactPtr)); } - uptr compactPtrGroupBase(CompactPtrT CompactPtr) { const uptr Mask = (static_cast(1) << GroupSizeLog) - 1; return CompactPtr & ~Mask; } - uptr decompactGroupBase(uptr CompactPtrGroupBase) { return CompactPtrGroupBase; } - - ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) { + ALWAYS_INLINE bool isSmallBlock(uptr BlockSize) { const uptr PageSize = getPageSizeCached(); return BlockSize < PageSize / 16U; } - - ALWAYS_INLINE static bool isLargeBlock(uptr BlockSize) { + ALWAYS_INLINE bool isLargeBlock(uptr BlockSize) { const uptr PageSize = getPageSizeCached(); return BlockSize > PageSize; } u16 popBlocks(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, - CompactPtrT *ToArray, const u16 MaxBlockCount) { - DCHECK_LT(ClassId, NumClasses); - SizeClassInfo *Sci = getSizeClassInfo(ClassId); - ScopedLock L(Sci->Mutex); - - u16 PopCount = - popBlocksImpl(SizeClassAllocator, ClassId, Sci, ToArray, MaxBlockCount); - if (UNLIKELY(PopCount == 0)) { - if (UNLIKELY(!populateFreeList(SizeClassAllocator, ClassId, Sci))) - return 0U; - PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Sci, ToArray, - MaxBlockCount); - DCHECK_NE(PopCount, 0U); - } - - return PopCount; - } + CompactPtrT *ToArray, const u16 MaxBlockCount); // Push the array of free blocks to the designated batch group. void pushBlocks(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, - CompactPtrT *Array, u32 Size) { - DCHECK_LT(ClassId, NumClasses); - DCHECK_GT(Size, 0); + CompactPtrT *Array, u32 Size); - SizeClassInfo *Sci = getSizeClassInfo(ClassId); - if (ClassId == SizeClassMap::BatchClassId) { - ScopedLock L(Sci->Mutex); - pushBatchClassBlocks(Sci, Array, Size); - return; - } + void disable() NO_THREAD_SAFETY_ANALYSIS; + void enable() NO_THREAD_SAFETY_ANALYSIS; - // TODO(chiahungduan): Consider not doing grouping if the group size is not - // greater than the block size with a certain scale. - - // Sort the blocks so that blocks belonging to the same group can be pushed - // together. - bool SameGroup = true; - for (u32 I = 1; I < Size; ++I) { - if (compactPtrGroupBase(Array[I - 1]) != compactPtrGroupBase(Array[I])) - SameGroup = false; - CompactPtrT Cur = Array[I]; - u32 J = I; - while (J > 0 && - compactPtrGroupBase(Cur) < compactPtrGroupBase(Array[J - 1])) { - Array[J] = Array[J - 1]; - --J; - } - Array[J] = Cur; - } - - ScopedLock L(Sci->Mutex); - pushBlocksImpl(SizeClassAllocator, ClassId, Sci, Array, Size, SameGroup); - } - - void disable() NO_THREAD_SAFETY_ANALYSIS { - // The BatchClassId must be locked last since other classes can use it. - for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { - if (static_cast(I) == SizeClassMap::BatchClassId) - continue; - getSizeClassInfo(static_cast(I))->Mutex.lock(); - } - getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.lock(); - RegionsStashMutex.lock(); - ByteMapMutex.lock(); - } - - void enable() NO_THREAD_SAFETY_ANALYSIS { - ByteMapMutex.unlock(); - RegionsStashMutex.unlock(); - getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); - for (uptr I = 0; I < NumClasses; I++) { - if (I == SizeClassMap::BatchClassId) - continue; - getSizeClassInfo(I)->Mutex.unlock(); - } - } - - template void iterateOverBlocks(F Callback) { - uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; - for (uptr I = 0; I < NumClasses; I++) { - SizeClassInfo *Sci = getSizeClassInfo(I); - // TODO: The call of `iterateOverBlocks` requires disabling - // SizeClassAllocator32. We may consider locking each region on demand - // only. - Sci->Mutex.assertHeld(); - if (Sci->MinRegionIndex < MinRegionIndex) - MinRegionIndex = Sci->MinRegionIndex; - if (Sci->MaxRegionIndex > MaxRegionIndex) - MaxRegionIndex = Sci->MaxRegionIndex; - } - - // SizeClassAllocator32 is disabled, i.e., ByteMapMutex is held. - ByteMapMutex.assertHeld(); - - for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { - if (PossibleRegions[I] && - (PossibleRegions[I] - 1U) != SizeClassMap::BatchClassId) { - const uptr BlockSize = getSizeByClassId(PossibleRegions[I] - 1U); - const uptr From = I * RegionSize; - const uptr To = From + (RegionSize / BlockSize) * BlockSize; - for (uptr Block = From; Block < To; Block += BlockSize) - Callback(Block); - } - } - } - - void getStats(ScopedString *Str) { - // TODO(kostyak): get the RSS per region. - uptr TotalMapped = 0; - uptr PoppedBlocks = 0; - uptr PushedBlocks = 0; - for (uptr I = 0; I < NumClasses; I++) { - SizeClassInfo *Sci = getSizeClassInfo(I); - ScopedLock L(Sci->Mutex); - TotalMapped += Sci->AllocatedUser; - PoppedBlocks += Sci->FreeListInfo.PoppedBlocks; - PushedBlocks += Sci->FreeListInfo.PushedBlocks; - } - Str->append("Stats: SizeClassAllocator32: %zuM mapped in %zu allocations; " - "remains %zu\n", - TotalMapped >> 20, PoppedBlocks, PoppedBlocks - PushedBlocks); - for (uptr I = 0; I < NumClasses; I++) { - SizeClassInfo *Sci = getSizeClassInfo(I); - ScopedLock L(Sci->Mutex); - getStats(Str, I, Sci); - } - } - - void getFragmentationInfo(ScopedString *Str) { - Str->append( - "Fragmentation Stats: SizeClassAllocator32: page size = %zu bytes\n", - getPageSizeCached()); - - for (uptr I = 1; I < NumClasses; I++) { - SizeClassInfo *Sci = getSizeClassInfo(I); - ScopedLock L(Sci->Mutex); - getSizeClassFragmentationInfo(Sci, I, Str); - } - } + template void iterateOverBlocks(F Callback); + void getStats(ScopedString *Str); + void getFragmentationInfo(ScopedString *Str); void getMemoryGroupFragmentationInfo(ScopedString *Str) { // Each region is also a memory group because region size is the same as // group size. getFragmentationInfo(Str); } - bool setOption(Option O, sptr Value) { - if (O == Option::ReleaseInterval) { - const s32 Interval = Max( - Min(static_cast(Value), Config::getMaxReleaseToOsIntervalMs()), - Config::getMinReleaseToOsIntervalMs()); - atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); - return true; - } - // Not supported by the Primary, but not an error either. - return true; - } + bool setOption(Option O, sptr Value); - uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType) { - SizeClassInfo *Sci = getSizeClassInfo(ClassId); - // TODO: Once we have separate locks like primary64, we may consider using - // tryLock() as well. - ScopedLock L(Sci->Mutex); - return releaseToOSMaybe(Sci, ClassId, ReleaseType); - } - - uptr releaseToOS(ReleaseToOS ReleaseType) { - uptr TotalReleasedBytes = 0; - for (uptr I = 0; I < NumClasses; I++) { - if (I == SizeClassMap::BatchClassId) - continue; - SizeClassInfo *Sci = getSizeClassInfo(I); - ScopedLock L(Sci->Mutex); - TotalReleasedBytes += releaseToOSMaybe(Sci, I, ReleaseType); - } - return TotalReleasedBytes; - } + uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType); + uptr releaseToOS(ReleaseToOS ReleaseType); const char *getRegionInfoArrayAddress() const { return nullptr; } static uptr getRegionInfoArraySize() { return 0; } + // Not supported in SizeClassAllocator32. static BlockInfo findNearestBlock(UNUSED const char *RegionInfoData, UNUSED uptr Ptr) { return {}; @@ -434,755 +183,1088 @@ template class SizeClassAllocator32 { return Id; } - uptr allocateRegionSlow() { - uptr MapSize = 2 * RegionSize; - const uptr MapBase = reinterpret_cast( - map(nullptr, MapSize, "scudo:primary", MAP_ALLOWNOMEM)); - if (!MapBase) - return 0; - const uptr MapEnd = MapBase + MapSize; - uptr Region = MapBase; - if (isAligned(Region, RegionSize)) { - ScopedLock L(RegionsStashMutex); - if (NumberOfStashedRegions < MaxStashedRegions) - RegionsStash[NumberOfStashedRegions++] = MapBase + RegionSize; - else - MapSize = RegionSize; + uptr allocateRegion(SizeClassInfo *Sci, uptr ClassId) REQUIRES(Sci->Mutex); + uptr allocateRegionSlow(); + + SizeClassInfo *getSizeClassInfo(uptr ClassId) { + DCHECK_LT(ClassId, NumClasses); + return &SizeClassInfoArray[ClassId]; + } + + void pushBatchClassBlocks(SizeClassInfo *Sci, CompactPtrT *Array, u32 Size) + REQUIRES(Sci->Mutex); + + void pushBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, + SizeClassInfo *Sci, CompactPtrT *Array, u32 Size, + bool SameGroup = false) REQUIRES(Sci->Mutex); + u16 popBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, + SizeClassInfo *Sci, CompactPtrT *ToArray, + const u16 MaxBlockCount) REQUIRES(Sci->Mutex); + NOINLINE bool populateFreeList(SizeClassAllocatorT *SizeClassAllocator, + uptr ClassId, SizeClassInfo *Sci) + REQUIRES(Sci->Mutex); + + void getStats(ScopedString *Str, uptr ClassId, SizeClassInfo *Sci) + REQUIRES(Sci->Mutex); + void getSizeClassFragmentationInfo(SizeClassInfo *Sci, uptr ClassId, + ScopedString *Str) REQUIRES(Sci->Mutex); + + NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, + ReleaseToOS ReleaseType = ReleaseToOS::Normal) + REQUIRES(Sci->Mutex); + bool hasChanceToReleasePages(SizeClassInfo *Sci, uptr BlockSize, + uptr BytesInFreeList, ReleaseToOS ReleaseType) + REQUIRES(Sci->Mutex); + PageReleaseContext markFreeBlocks(SizeClassInfo *Sci, const uptr ClassId, + const uptr BlockSize, const uptr Base, + const uptr NumberOfRegions, + ReleaseToOS ReleaseType) + REQUIRES(Sci->Mutex); + + SizeClassInfo SizeClassInfoArray[NumClasses] = {}; + HybridMutex ByteMapMutex; + // Track the regions in use, 0 is unused, otherwise store ClassId + 1. + ByteMap PossibleRegions GUARDED_BY(ByteMapMutex) = {}; + atomic_s32 ReleaseToOsIntervalMs = {}; + // Unless several threads request regions simultaneously from different size + // classes, the stash rarely contains more than 1 entry. + static constexpr uptr MaxStashedRegions = 4; + HybridMutex RegionsStashMutex; + uptr NumberOfStashedRegions GUARDED_BY(RegionsStashMutex) = 0; + uptr RegionsStash[MaxStashedRegions] GUARDED_BY(RegionsStashMutex) = {}; +}; + +template +void SizeClassAllocator32::init(s32 ReleaseToOsInterval) + NO_THREAD_SAFETY_ANALYSIS { + if (SCUDO_FUCHSIA) + reportError("SizeClassAllocator32 is not supported on Fuchsia"); + + if (SCUDO_TRUSTY) + reportError("SizeClassAllocator32 is not supported on Trusty"); + + DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); + PossibleRegions.init(); + u32 Seed; + const u64 Time = getMonotonicTimeFast(); + if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) + Seed = static_cast(Time ^ + (reinterpret_cast(SizeClassInfoArray) >> 6)); + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + Sci->RandState = getRandomU32(&Seed); + // Sci->MaxRegionIndex is already initialized to 0. + Sci->MinRegionIndex = NumRegions; + Sci->ReleaseInfo.LastReleaseAtNs = Time; + } + + // The default value in the primary config has the higher priority. + if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN) + ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs(); + setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); +} + +template void SizeClassAllocator32::unmapTestOnly() { + { + ScopedLock L(RegionsStashMutex); + while (NumberOfStashedRegions > 0) { + unmap(reinterpret_cast(RegionsStash[--NumberOfStashedRegions]), + RegionSize); + } + } + + uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); + if (Sci->MinRegionIndex < MinRegionIndex) + MinRegionIndex = Sci->MinRegionIndex; + if (Sci->MaxRegionIndex > MaxRegionIndex) + MaxRegionIndex = Sci->MaxRegionIndex; + *Sci = {}; + } + + ScopedLock L(ByteMapMutex); + for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) + if (PossibleRegions[I]) + unmap(reinterpret_cast(I * RegionSize), RegionSize); + PossibleRegions.unmapTestOnly(); +} + +template +void SizeClassAllocator32::verifyAllBlocksAreReleasedTestOnly() { + // `BatchGroup` and `Batch` also use the blocks from BatchClass. + uptr BatchClassUsedInFreeLists = 0; + for (uptr I = 0; I < NumClasses; I++) { + // We have to count BatchClassUsedInFreeLists in other regions first. + if (I == SizeClassMap::BatchClassId) + continue; + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L1(Sci->Mutex); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { + // `BG::Batches` are `Batches`. +1 for `BatchGroup`. + BatchClassUsedInFreeLists += BG.Batches.size() + 1; + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } + + const uptr BlockSize = getSizeByClassId(I); + DCHECK_EQ(TotalBlocks, Sci->AllocatedUser / BlockSize); + DCHECK_EQ(Sci->FreeListInfo.PushedBlocks, Sci->FreeListInfo.PoppedBlocks); + } + + SizeClassInfo *Sci = getSizeClassInfo(SizeClassMap::BatchClassId); + ScopedLock L1(Sci->Mutex); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { + if (LIKELY(!BG.Batches.empty())) { + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); } else { - Region = roundUp(MapBase, RegionSize); - unmap(reinterpret_cast(MapBase), Region - MapBase); - MapSize = RegionSize; + // `BatchGroup` with empty freelist doesn't have `Batch` record + // itself. + ++TotalBlocks; } - const uptr End = Region + MapSize; - if (End != MapEnd) - unmap(reinterpret_cast(End), MapEnd - End); + } + + const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId); + DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists, + Sci->AllocatedUser / BlockSize); + const uptr BlocksInUse = + Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; + DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); +} + +template +u16 SizeClassAllocator32::popBlocks( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, CompactPtrT *ToArray, + const u16 MaxBlockCount) { + DCHECK_LT(ClassId, NumClasses); + SizeClassInfo *Sci = getSizeClassInfo(ClassId); + ScopedLock L(Sci->Mutex); + + u16 PopCount = + popBlocksImpl(SizeClassAllocator, ClassId, Sci, ToArray, MaxBlockCount); + if (UNLIKELY(PopCount == 0)) { + if (UNLIKELY(!populateFreeList(SizeClassAllocator, ClassId, Sci))) + return 0U; + PopCount = + popBlocksImpl(SizeClassAllocator, ClassId, Sci, ToArray, MaxBlockCount); + DCHECK_NE(PopCount, 0U); + } + + return PopCount; +} - DCHECK_EQ(Region % RegionSize, 0U); - static_assert(Config::getRegionSizeLog() == GroupSizeLog, - "Memory group should be the same size as Region"); +template +void SizeClassAllocator32::pushBlocks( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, CompactPtrT *Array, + u32 Size) { + DCHECK_LT(ClassId, NumClasses); + DCHECK_GT(Size, 0); - return Region; + SizeClassInfo *Sci = getSizeClassInfo(ClassId); + if (ClassId == SizeClassMap::BatchClassId) { + ScopedLock L(Sci->Mutex); + pushBatchClassBlocks(Sci, Array, Size); + return; } - uptr allocateRegion(SizeClassInfo *Sci, uptr ClassId) REQUIRES(Sci->Mutex) { - DCHECK_LT(ClassId, NumClasses); - uptr Region = 0; - { - ScopedLock L(RegionsStashMutex); - if (NumberOfStashedRegions > 0) - Region = RegionsStash[--NumberOfStashedRegions]; + // TODO(chiahungduan): Consider not doing grouping if the group size is not + // greater than the block size with a certain scale. + + // Sort the blocks so that blocks belonging to the same group can be pushed + // together. + bool SameGroup = true; + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroupBase(Array[I - 1]) != compactPtrGroupBase(Array[I])) + SameGroup = false; + CompactPtrT Cur = Array[I]; + u32 J = I; + while (J > 0 && + compactPtrGroupBase(Cur) < compactPtrGroupBase(Array[J - 1])) { + Array[J] = Array[J - 1]; + --J; } - if (!Region) - Region = allocateRegionSlow(); - if (LIKELY(Region)) { - // Sci->Mutex is held by the caller, updating the Min/Max is safe. - const uptr RegionIndex = computeRegionId(Region); - if (RegionIndex < Sci->MinRegionIndex) - Sci->MinRegionIndex = RegionIndex; - if (RegionIndex > Sci->MaxRegionIndex) - Sci->MaxRegionIndex = RegionIndex; - ScopedLock L(ByteMapMutex); - PossibleRegions.set(RegionIndex, static_cast(ClassId + 1U)); + Array[J] = Cur; + } + + ScopedLock L(Sci->Mutex); + pushBlocksImpl(SizeClassAllocator, ClassId, Sci, Array, Size, SameGroup); +} + +template +void SizeClassAllocator32::disable() NO_THREAD_SAFETY_ANALYSIS { + // The BatchClassId must be locked last since other classes can use it. + for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { + if (static_cast(I) == SizeClassMap::BatchClassId) + continue; + getSizeClassInfo(static_cast(I))->Mutex.lock(); + } + getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.lock(); + RegionsStashMutex.lock(); + ByteMapMutex.lock(); +} + +template +void SizeClassAllocator32::enable() NO_THREAD_SAFETY_ANALYSIS { + ByteMapMutex.unlock(); + RegionsStashMutex.unlock(); + getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); + for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; + getSizeClassInfo(I)->Mutex.unlock(); + } +} + +template +template +void SizeClassAllocator32::iterateOverBlocks(F Callback) { + uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + // TODO: The call of `iterateOverBlocks` requires disabling + // SizeClassAllocator32. We may consider locking each region on demand + // only. + Sci->Mutex.assertHeld(); + if (Sci->MinRegionIndex < MinRegionIndex) + MinRegionIndex = Sci->MinRegionIndex; + if (Sci->MaxRegionIndex > MaxRegionIndex) + MaxRegionIndex = Sci->MaxRegionIndex; + } + + // SizeClassAllocator32 is disabled, i.e., ByteMapMutex is held. + ByteMapMutex.assertHeld(); + + for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { + if (PossibleRegions[I] && + (PossibleRegions[I] - 1U) != SizeClassMap::BatchClassId) { + const uptr BlockSize = getSizeByClassId(PossibleRegions[I] - 1U); + const uptr From = I * RegionSize; + const uptr To = From + (RegionSize / BlockSize) * BlockSize; + for (uptr Block = From; Block < To; Block += BlockSize) + Callback(Block); } - return Region; } +} + +template +void SizeClassAllocator32::getStats(ScopedString *Str) { + // TODO(kostyak): get the RSS per region. + uptr TotalMapped = 0; + uptr PoppedBlocks = 0; + uptr PushedBlocks = 0; + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); + TotalMapped += Sci->AllocatedUser; + PoppedBlocks += Sci->FreeListInfo.PoppedBlocks; + PushedBlocks += Sci->FreeListInfo.PushedBlocks; + } + Str->append("Stats: SizeClassAllocator32: %zuM mapped in %zu allocations; " + "remains %zu\n", + TotalMapped >> 20, PoppedBlocks, PoppedBlocks - PushedBlocks); + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); + getStats(Str, I, Sci); + } +} - SizeClassInfo *getSizeClassInfo(uptr ClassId) { - DCHECK_LT(ClassId, NumClasses); - return &SizeClassInfoArray[ClassId]; +template +void SizeClassAllocator32::getFragmentationInfo(ScopedString *Str) { + Str->append( + "Fragmentation Stats: SizeClassAllocator32: page size = %zu bytes\n", + getPageSizeCached()); + + for (uptr I = 1; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); + getSizeClassFragmentationInfo(Sci, I, Str); + } +} + +template +bool SizeClassAllocator32::setOption(Option O, sptr Value) { + if (O == Option::ReleaseInterval) { + const s32 Interval = + Max(Min(static_cast(Value), Config::getMaxReleaseToOsIntervalMs()), + Config::getMinReleaseToOsIntervalMs()); + atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); + return true; + } + // Not supported by the Primary, but not an error either. + return true; +} + +template +uptr SizeClassAllocator32::tryReleaseToOS(uptr ClassId, + ReleaseToOS ReleaseType) { + SizeClassInfo *Sci = getSizeClassInfo(ClassId); + // TODO: Once we have separate locks like primary64, we may consider using + // tryLock() as well. + ScopedLock L(Sci->Mutex); + return releaseToOSMaybe(Sci, ClassId, ReleaseType); +} + +template +uptr SizeClassAllocator32::releaseToOS(ReleaseToOS ReleaseType) { + uptr TotalReleasedBytes = 0; + for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); + TotalReleasedBytes += releaseToOSMaybe(Sci, I, ReleaseType); + } + return TotalReleasedBytes; +} + +template +uptr SizeClassAllocator32::allocateRegion(SizeClassInfo *Sci, + uptr ClassId) + REQUIRES(Sci->Mutex) { + DCHECK_LT(ClassId, NumClasses); + uptr Region = 0; + { + ScopedLock L(RegionsStashMutex); + if (NumberOfStashedRegions > 0) + Region = RegionsStash[--NumberOfStashedRegions]; + } + if (!Region) + Region = allocateRegionSlow(); + if (LIKELY(Region)) { + // Sci->Mutex is held by the caller, updating the Min/Max is safe. + const uptr RegionIndex = computeRegionId(Region); + if (RegionIndex < Sci->MinRegionIndex) + Sci->MinRegionIndex = RegionIndex; + if (RegionIndex > Sci->MaxRegionIndex) + Sci->MaxRegionIndex = RegionIndex; + ScopedLock L(ByteMapMutex); + PossibleRegions.set(RegionIndex, static_cast(ClassId + 1U)); + } + return Region; +} + +template +uptr SizeClassAllocator32::allocateRegionSlow() { + uptr MapSize = 2 * RegionSize; + const uptr MapBase = reinterpret_cast( + map(nullptr, MapSize, "scudo:primary", MAP_ALLOWNOMEM)); + if (!MapBase) + return 0; + const uptr MapEnd = MapBase + MapSize; + uptr Region = MapBase; + if (isAligned(Region, RegionSize)) { + ScopedLock L(RegionsStashMutex); + if (NumberOfStashedRegions < MaxStashedRegions) + RegionsStash[NumberOfStashedRegions++] = MapBase + RegionSize; + else + MapSize = RegionSize; + } else { + Region = roundUp(MapBase, RegionSize); + unmap(reinterpret_cast(MapBase), Region - MapBase); + MapSize = RegionSize; + } + const uptr End = Region + MapSize; + if (End != MapEnd) + unmap(reinterpret_cast(End), MapEnd - End); + + DCHECK_EQ(Region % RegionSize, 0U); + static_assert(Config::getRegionSizeLog() == GroupSizeLog, + "Memory group should be the same size as Region"); + + return Region; +} + +template +void SizeClassAllocator32::pushBatchClassBlocks(SizeClassInfo *Sci, + CompactPtrT *Array, + u32 Size) + REQUIRES(Sci->Mutex) { + DCHECK_EQ(Sci, getSizeClassInfo(SizeClassMap::BatchClassId)); + + // Free blocks are recorded by Batch in freelist for all + // size-classes. In addition, Batch is allocated from BatchClassId. + // In order not to use additional block to record the free blocks in + // BatchClassId, they are self-contained. I.e., A Batch records the + // block address of itself. See the figure below: + // + // Batch at 0xABCD + // +----------------------------+ + // | Free blocks' addr | + // | +------+------+------+ | + // | |0xABCD|... |... | | + // | +------+------+------+ | + // +----------------------------+ + // + // When we allocate all the free blocks in the Batch, the block used + // by Batch is also free for use. We don't need to recycle the + // Batch. Note that the correctness is maintained by the invariant, + // + // Each popBlocks() request returns the entire Batch. Returning + // part of the blocks in a Batch is invalid. + // + // This ensures that Batch won't leak the address itself while it's + // still holding other valid data. + // + // Besides, BatchGroup is also allocated from BatchClassId and has its + // address recorded in the Batch too. To maintain the correctness, + // + // The address of BatchGroup is always recorded in the last Batch + // in the freelist (also imply that the freelist should only be + // updated with push_front). Once the last Batch is popped, + // the block used by BatchGroup is also free for use. + // + // With this approach, the blocks used by BatchGroup and Batch are + // reusable and don't need additional space for them. + + Sci->FreeListInfo.PushedBlocks += Size; + BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + + if (BG == nullptr) { + // Construct `BatchGroup` on the last element. + BG = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + --Size; + BG->Batches.clear(); + // BatchClass hasn't enabled memory group. Use `0` to indicate there's no + // memory group here. + BG->CompactPtrGroupBase = 0; + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = SizeClassAllocatorT::getMaxCached( + getSizeByClassId(SizeClassMap::BatchClassId)); + + Sci->FreeListInfo.BlockList.push_front(BG); } - void pushBatchClassBlocks(SizeClassInfo *Sci, CompactPtrT *Array, u32 Size) - REQUIRES(Sci->Mutex) { - DCHECK_EQ(Sci, getSizeClassInfo(SizeClassMap::BatchClassId)); - - // Free blocks are recorded by Batch in freelist for all - // size-classes. In addition, Batch is allocated from BatchClassId. - // In order not to use additional block to record the free blocks in - // BatchClassId, they are self-contained. I.e., A Batch records the - // block address of itself. See the figure below: - // - // Batch at 0xABCD - // +----------------------------+ - // | Free blocks' addr | - // | +------+------+------+ | - // | |0xABCD|... |... | | - // | +------+------+------+ | - // +----------------------------+ - // - // When we allocate all the free blocks in the Batch, the block used - // by Batch is also free for use. We don't need to recycle the - // Batch. Note that the correctness is maintained by the invariant, - // - // Each popBlocks() request returns the entire Batch. Returning - // part of the blocks in a Batch is invalid. - // - // This ensures that Batch won't leak the address itself while it's - // still holding other valid data. - // - // Besides, BatchGroup is also allocated from BatchClassId and has its - // address recorded in the Batch too. To maintain the correctness, - // - // The address of BatchGroup is always recorded in the last Batch - // in the freelist (also imply that the freelist should only be - // updated with push_front). Once the last Batch is popped, - // the block used by BatchGroup is also free for use. - // - // With this approach, the blocks used by BatchGroup and Batch are - // reusable and don't need additional space for them. - - Sci->FreeListInfo.PushedBlocks += Size; - BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + if (UNLIKELY(Size == 0)) + return; + + // This happens under 2 cases. + // 1. just allocated a new `BatchGroup`. + // 2. Only 1 block is pushed when the freelist is empty. + if (BG->Batches.empty()) { + // Construct the `Batch` on the last element. + BatchT *TB = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + TB->clear(); + // As mentioned above, addresses of `Batch` and `BatchGroup` are + // recorded in the Batch. + TB->add(Array[Size - 1]); + TB->add(compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(BG))); + --Size; + BG->Batches.push_front(TB); + } - if (BG == nullptr) { - // Construct `BatchGroup` on the last element. - BG = reinterpret_cast( - decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); - --Size; - BG->Batches.clear(); - // BatchClass hasn't enabled memory group. Use `0` to indicate there's no - // memory group here. - BG->CompactPtrGroupBase = 0; - BG->BytesInBGAtLastCheckpoint = 0; - BG->MaxCachedPerBatch = SizeClassAllocatorT::getMaxCached( - getSizeByClassId(SizeClassMap::BatchClassId)); - - Sci->FreeListInfo.BlockList.push_front(BG); + BatchT *CurBatch = BG->Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + u16 UnusedSlots = + static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[I])); + CurBatch->clear(); + // Self-contained + CurBatch->add(Array[I]); + ++I; + // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of + // BatchClassId. + BG->Batches.push_front(CurBatch); + UnusedSlots = static_cast(BG->MaxCachedPerBatch - 1); } + // `UnusedSlots` is u16 so the result will be also fit in u16. + const u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; + } +} - if (UNLIKELY(Size == 0)) - return; - - // This happens under 2 cases. - // 1. just allocated a new `BatchGroup`. - // 2. Only 1 block is pushed when the freelist is empty. - if (BG->Batches.empty()) { - // Construct the `Batch` on the last element. - BatchT *TB = reinterpret_cast( - decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); - TB->clear(); - // As mentioned above, addresses of `Batch` and `BatchGroup` are - // recorded in the Batch. - TB->add(Array[Size - 1]); - TB->add( - compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(BG))); - --Size; - BG->Batches.push_front(TB); - } +// Push the blocks to their batch group. The layout will be like, +// +// FreeListInfo.BlockList - > BG -> BG -> BG +// | | | +// v v v +// TB TB TB +// | +// v +// TB +// +// Each BlockGroup(BG) will associate with unique group id and the free blocks +// are managed by a list of Batch(TB). To reduce the time of inserting blocks, +// BGs are sorted and the input `Array` are supposed to be sorted so that we can +// get better performance of maintaining sorted property. Use `SameGroup=true` +// to indicate that all blocks in the array are from the same group then we will +// skip checking the group id of each block. +// +// The region mutex needs to be held while calling this method. +template +void SizeClassAllocator32::pushBlocksImpl( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, SizeClassInfo *Sci, + CompactPtrT *Array, u32 Size, bool SameGroup) REQUIRES(Sci->Mutex) { + DCHECK_NE(ClassId, SizeClassMap::BatchClassId); + DCHECK_GT(Size, 0U); + + auto CreateGroup = [&](uptr CompactPtrGroupBase) { + BatchGroupT *BG = reinterpret_cast( + SizeClassAllocator->getBatchClassBlock()); + BG->Batches.clear(); + BatchT *TB = + reinterpret_cast(SizeClassAllocator->getBatchClassBlock()); + TB->clear(); + + BG->CompactPtrGroupBase = CompactPtrGroupBase; + BG->Batches.push_front(TB); + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = MaxNumBlocksInBatch; + + return BG; + }; - BatchT *CurBatch = BG->Batches.front(); + auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) { + SinglyLinkedList &Batches = BG->Batches; + BatchT *CurBatch = Batches.front(); DCHECK_NE(CurBatch, nullptr); for (u32 I = 0; I < Size;) { + DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount()); u16 UnusedSlots = static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); if (UnusedSlots == 0) { CurBatch = reinterpret_cast( - decompactPtr(SizeClassMap::BatchClassId, Array[I])); + SizeClassAllocator->getBatchClassBlock()); CurBatch->clear(); - // Self-contained - CurBatch->add(Array[I]); - ++I; - // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of - // BatchClassId. - BG->Batches.push_front(CurBatch); - UnusedSlots = static_cast(BG->MaxCachedPerBatch - 1); + Batches.push_front(CurBatch); + UnusedSlots = BG->MaxCachedPerBatch; } // `UnusedSlots` is u16 so the result will be also fit in u16. - const u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); + u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); CurBatch->appendFromArray(&Array[I], AppendSize); I += AppendSize; } + }; + + Sci->FreeListInfo.PushedBlocks += Size; + BatchGroupT *Cur = Sci->FreeListInfo.BlockList.front(); + + // In the following, `Cur` always points to the BatchGroup for blocks that + // will be pushed next. `Prev` is the element right before `Cur`. + BatchGroupT *Prev = nullptr; + + while (Cur != nullptr && + compactPtrGroupBase(Array[0]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; } - // Push the blocks to their batch group. The layout will be like, - // - // FreeListInfo.BlockList - > BG -> BG -> BG - // | | | - // v v v - // TB TB TB - // | - // v - // TB - // - // Each BlockGroup(BG) will associate with unique group id and the free blocks - // are managed by a list of Batch(TB). To reduce the time of inserting - // blocks, BGs are sorted and the input `Array` are supposed to be sorted so - // that we can get better performance of maintaining sorted property. - // Use `SameGroup=true` to indicate that all blocks in the array are from the - // same group then we will skip checking the group id of each block. - // - // The region mutex needs to be held while calling this method. - void pushBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, - SizeClassInfo *Sci, CompactPtrT *Array, u32 Size, - bool SameGroup = false) REQUIRES(Sci->Mutex) { - DCHECK_NE(ClassId, SizeClassMap::BatchClassId); - DCHECK_GT(Size, 0U); - - auto CreateGroup = [&](uptr CompactPtrGroupBase) { - BatchGroupT *BG = reinterpret_cast( - SizeClassAllocator->getBatchClassBlock()); - BG->Batches.clear(); - BatchT *TB = - reinterpret_cast(SizeClassAllocator->getBatchClassBlock()); - TB->clear(); - - BG->CompactPtrGroupBase = CompactPtrGroupBase; - BG->Batches.push_front(TB); - BG->BytesInBGAtLastCheckpoint = 0; - BG->MaxCachedPerBatch = MaxNumBlocksInBatch; - - return BG; - }; - - auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) { - SinglyLinkedList &Batches = BG->Batches; - BatchT *CurBatch = Batches.front(); - DCHECK_NE(CurBatch, nullptr); - - for (u32 I = 0; I < Size;) { - DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount()); - u16 UnusedSlots = - static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); - if (UnusedSlots == 0) { - CurBatch = reinterpret_cast( - SizeClassAllocator->getBatchClassBlock()); - CurBatch->clear(); - Batches.push_front(CurBatch); - UnusedSlots = BG->MaxCachedPerBatch; - } - // `UnusedSlots` is u16 so the result will be also fit in u16. - u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); - CurBatch->appendFromArray(&Array[I], AppendSize); - I += AppendSize; - } - }; - Sci->FreeListInfo.PushedBlocks += Size; - BatchGroupT *Cur = Sci->FreeListInfo.BlockList.front(); + if (Cur == nullptr || + compactPtrGroupBase(Array[0]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroupBase(Array[0])); + if (Prev == nullptr) + Sci->FreeListInfo.BlockList.push_front(Cur); + else + Sci->FreeListInfo.BlockList.insert(Prev, Cur); + } - // In the following, `Cur` always points to the BatchGroup for blocks that - // will be pushed next. `Prev` is the element right before `Cur`. - BatchGroupT *Prev = nullptr; + // All the blocks are from the same group, just push without checking group + // id. + if (SameGroup) { + for (u32 I = 0; I < Size; ++I) + DCHECK_EQ(compactPtrGroupBase(Array[I]), Cur->CompactPtrGroupBase); - while (Cur != nullptr && - compactPtrGroupBase(Array[0]) > Cur->CompactPtrGroupBase) { - Prev = Cur; - Cur = Cur->Next; - } + InsertBlocks(Cur, Array, Size); + return; + } + + // The blocks are sorted by group id. Determine the segment of group and + // push them to their group together. + u32 Count = 1; + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroupBase(Array[I - 1]) != compactPtrGroupBase(Array[I])) { + DCHECK_EQ(compactPtrGroupBase(Array[I - 1]), Cur->CompactPtrGroupBase); + InsertBlocks(Cur, Array + I - Count, Count); + + while (Cur != nullptr && + compactPtrGroupBase(Array[I]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } - if (Cur == nullptr || - compactPtrGroupBase(Array[0]) != Cur->CompactPtrGroupBase) { - Cur = CreateGroup(compactPtrGroupBase(Array[0])); - if (Prev == nullptr) - Sci->FreeListInfo.BlockList.push_front(Cur); - else + if (Cur == nullptr || + compactPtrGroupBase(Array[I]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroupBase(Array[I])); + DCHECK_NE(Prev, nullptr); Sci->FreeListInfo.BlockList.insert(Prev, Cur); + } + + Count = 1; + } else { + ++Count; } + } - // All the blocks are from the same group, just push without checking group - // id. - if (SameGroup) { - for (u32 I = 0; I < Size; ++I) - DCHECK_EQ(compactPtrGroupBase(Array[I]), Cur->CompactPtrGroupBase); + InsertBlocks(Cur, Array + Size - Count, Count); +} - InsertBlocks(Cur, Array, Size); - return; - } +template +u16 SizeClassAllocator32::popBlocksImpl( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, SizeClassInfo *Sci, + CompactPtrT *ToArray, const u16 MaxBlockCount) REQUIRES(Sci->Mutex) { + if (Sci->FreeListInfo.BlockList.empty()) + return 0U; - // The blocks are sorted by group id. Determine the segment of group and - // push them to their group together. - u32 Count = 1; - for (u32 I = 1; I < Size; ++I) { - if (compactPtrGroupBase(Array[I - 1]) != compactPtrGroupBase(Array[I])) { - DCHECK_EQ(compactPtrGroupBase(Array[I - 1]), Cur->CompactPtrGroupBase); - InsertBlocks(Cur, Array + I - Count, Count); - - while (Cur != nullptr && - compactPtrGroupBase(Array[I]) > Cur->CompactPtrGroupBase) { - Prev = Cur; - Cur = Cur->Next; - } - - if (Cur == nullptr || - compactPtrGroupBase(Array[I]) != Cur->CompactPtrGroupBase) { - Cur = CreateGroup(compactPtrGroupBase(Array[I])); - DCHECK_NE(Prev, nullptr); - Sci->FreeListInfo.BlockList.insert(Prev, Cur); - } - - Count = 1; - } else { - ++Count; - } - } + SinglyLinkedList &Batches = + Sci->FreeListInfo.BlockList.front()->Batches; - InsertBlocks(Cur, Array + Size - Count, Count); + if (Batches.empty()) { + DCHECK_EQ(ClassId, SizeClassMap::BatchClassId); + BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + Sci->FreeListInfo.BlockList.pop_front(); + + // Block used by `BatchGroup` is from BatchClassId. Turn the block into + // `Batch` with single block. + BatchT *TB = reinterpret_cast(BG); + ToArray[0] = + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(TB)); + Sci->FreeListInfo.PoppedBlocks += 1; + return 1U; } - u16 popBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, - SizeClassInfo *Sci, CompactPtrT *ToArray, - const u16 MaxBlockCount) REQUIRES(Sci->Mutex) { - if (Sci->FreeListInfo.BlockList.empty()) - return 0U; - - SinglyLinkedList &Batches = - Sci->FreeListInfo.BlockList.front()->Batches; + // So far, instead of always filling the blocks to `MaxBlockCount`, we only + // examine single `Batch` to minimize the time spent on the primary + // allocator. Besides, the sizes of `Batch` and + // `SizeClassAllocatorT::getMaxCached()` may also impact the time spent on + // accessing the primary allocator. + // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` + // blocks and/or adjust the size of `Batch` according to + // `SizeClassAllocatorT::getMaxCached()`. + BatchT *B = Batches.front(); + DCHECK_NE(B, nullptr); + DCHECK_GT(B->getCount(), 0U); + + // BachClassId should always take all blocks in the Batch. Read the + // comment in `pushBatchClassBlocks()` for more details. + const u16 PopCount = ClassId == SizeClassMap::BatchClassId + ? B->getCount() + : Min(MaxBlockCount, B->getCount()); + B->moveNToArray(ToArray, PopCount); + + // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be + // done without holding `Mutex`. + if (B->empty()) { + Batches.pop_front(); + // `Batch` of BatchClassId is self-contained, no need to + // deallocate. Read the comment in `pushBatchClassBlocks()` for more + // details. + if (ClassId != SizeClassMap::BatchClassId) + SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, B); if (Batches.empty()) { - DCHECK_EQ(ClassId, SizeClassMap::BatchClassId); BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); Sci->FreeListInfo.BlockList.pop_front(); - // Block used by `BatchGroup` is from BatchClassId. Turn the block into - // `Batch` with single block. - BatchT *TB = reinterpret_cast(BG); - ToArray[0] = - compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(TB)); - Sci->FreeListInfo.PoppedBlocks += 1; - return 1U; - } - - // So far, instead of always filling the blocks to `MaxBlockCount`, we only - // examine single `Batch` to minimize the time spent on the primary - // allocator. Besides, the sizes of `Batch` and - // `SizeClassAllocatorT::getMaxCached()` may also impact the time spent on - // accessing the primary allocator. - // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` - // blocks and/or adjust the size of `Batch` according to - // `SizeClassAllocatorT::getMaxCached()`. - BatchT *B = Batches.front(); - DCHECK_NE(B, nullptr); - DCHECK_GT(B->getCount(), 0U); - - // BachClassId should always take all blocks in the Batch. Read the - // comment in `pushBatchClassBlocks()` for more details. - const u16 PopCount = ClassId == SizeClassMap::BatchClassId - ? B->getCount() - : Min(MaxBlockCount, B->getCount()); - B->moveNToArray(ToArray, PopCount); - - // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be - // done without holding `Mutex`. - if (B->empty()) { - Batches.pop_front(); - // `Batch` of BatchClassId is self-contained, no need to - // deallocate. Read the comment in `pushBatchClassBlocks()` for more - // details. + // We don't keep BatchGroup with zero blocks to avoid empty-checking + // while allocating. Note that block used for constructing BatchGroup is + // recorded as free blocks in the last element of BatchGroup::Batches. + // Which means, once we pop the last Batch, the block is + // implicitly deallocated. if (ClassId != SizeClassMap::BatchClassId) - SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, B); - - if (Batches.empty()) { - BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); - Sci->FreeListInfo.BlockList.pop_front(); - - // We don't keep BatchGroup with zero blocks to avoid empty-checking - // while allocating. Note that block used for constructing BatchGroup is - // recorded as free blocks in the last element of BatchGroup::Batches. - // Which means, once we pop the last Batch, the block is - // implicitly deallocated. - if (ClassId != SizeClassMap::BatchClassId) - SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, BG); - } + SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, BG); } - - Sci->FreeListInfo.PoppedBlocks += PopCount; - return PopCount; } - NOINLINE bool populateFreeList(SizeClassAllocatorT *SizeClassAllocator, - uptr ClassId, SizeClassInfo *Sci) - REQUIRES(Sci->Mutex) { - uptr Region; - uptr Offset; - // If the size-class currently has a region associated to it, use it. The - // newly created blocks will be located after the currently allocated memory - // for that region (up to RegionSize). Otherwise, create a new region, where - // the new blocks will be carved from the beginning. - if (Sci->CurrentRegion) { - Region = Sci->CurrentRegion; - DCHECK_GT(Sci->CurrentRegionAllocated, 0U); - Offset = Sci->CurrentRegionAllocated; - } else { - DCHECK_EQ(Sci->CurrentRegionAllocated, 0U); - Region = allocateRegion(Sci, ClassId); - if (UNLIKELY(!Region)) - return false; - SizeClassAllocator->getStats().add(StatMapped, RegionSize); - Sci->CurrentRegion = Region; - Offset = 0; - } + Sci->FreeListInfo.PoppedBlocks += PopCount; + return PopCount; +} + +template +bool SizeClassAllocator32::populateFreeList( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, SizeClassInfo *Sci) + REQUIRES(Sci->Mutex) { + uptr Region; + uptr Offset; + // If the size-class currently has a region associated to it, use it. The + // newly created blocks will be located after the currently allocated memory + // for that region (up to RegionSize). Otherwise, create a new region, where + // the new blocks will be carved from the beginning. + if (Sci->CurrentRegion) { + Region = Sci->CurrentRegion; + DCHECK_GT(Sci->CurrentRegionAllocated, 0U); + Offset = Sci->CurrentRegionAllocated; + } else { + DCHECK_EQ(Sci->CurrentRegionAllocated, 0U); + Region = allocateRegion(Sci, ClassId); + if (UNLIKELY(!Region)) + return false; + SizeClassAllocator->getStats().add(StatMapped, RegionSize); + Sci->CurrentRegion = Region; + Offset = 0; + } - const uptr Size = getSizeByClassId(ClassId); - const u16 MaxCount = SizeClassAllocatorT::getMaxCached(Size); - DCHECK_GT(MaxCount, 0U); - // The maximum number of blocks we should carve in the region is dictated - // by the maximum number of batches we want to fill, and the amount of - // memory left in the current region (we use the lowest of the two). This - // will not be 0 as we ensure that a region can at least hold one block (via - // static_assert and at the end of this function). - const u32 NumberOfBlocks = - Min(MaxNumBatches * MaxCount, - static_cast((RegionSize - Offset) / Size)); - DCHECK_GT(NumberOfBlocks, 0U); - - constexpr u32 ShuffleArraySize = MaxNumBatches * MaxNumBlocksInBatch; - // Fill the transfer batches and put them in the size-class freelist. We - // need to randomize the blocks for security purposes, so we first fill a - // local array that we then shuffle before populating the batches. - CompactPtrT ShuffleArray[ShuffleArraySize]; - DCHECK_LE(NumberOfBlocks, ShuffleArraySize); - - uptr P = Region + Offset; - for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) - ShuffleArray[I] = reinterpret_cast(P); - - if (ClassId != SizeClassMap::BatchClassId) { - u32 N = 1; - uptr CurGroup = compactPtrGroupBase(ShuffleArray[0]); - for (u32 I = 1; I < NumberOfBlocks; I++) { - if (UNLIKELY(compactPtrGroupBase(ShuffleArray[I]) != CurGroup)) { - shuffle(ShuffleArray + I - N, N, &Sci->RandState); - pushBlocksImpl(SizeClassAllocator, ClassId, Sci, ShuffleArray + I - N, - N, - /*SameGroup=*/true); - N = 1; - CurGroup = compactPtrGroupBase(ShuffleArray[I]); - } else { - ++N; - } + const uptr Size = getSizeByClassId(ClassId); + const u16 MaxCount = SizeClassAllocatorT::getMaxCached(Size); + DCHECK_GT(MaxCount, 0U); + // The maximum number of blocks we should carve in the region is dictated + // by the maximum number of batches we want to fill, and the amount of + // memory left in the current region (we use the lowest of the two). This + // will not be 0 as we ensure that a region can at least hold one block (via + // static_assert and at the end of this function). + const u32 NumberOfBlocks = Min( + MaxNumBatches * MaxCount, static_cast((RegionSize - Offset) / Size)); + DCHECK_GT(NumberOfBlocks, 0U); + + constexpr u32 ShuffleArraySize = MaxNumBatches * MaxNumBlocksInBatch; + // Fill the transfer batches and put them in the size-class freelist. We + // need to randomize the blocks for security purposes, so we first fill a + // local array that we then shuffle before populating the batches. + CompactPtrT ShuffleArray[ShuffleArraySize]; + DCHECK_LE(NumberOfBlocks, ShuffleArraySize); + + uptr P = Region + Offset; + for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) + ShuffleArray[I] = reinterpret_cast(P); + + if (ClassId != SizeClassMap::BatchClassId) { + u32 N = 1; + uptr CurGroup = compactPtrGroupBase(ShuffleArray[0]); + for (u32 I = 1; I < NumberOfBlocks; I++) { + if (UNLIKELY(compactPtrGroupBase(ShuffleArray[I]) != CurGroup)) { + shuffle(ShuffleArray + I - N, N, &Sci->RandState); + pushBlocksImpl(SizeClassAllocator, ClassId, Sci, ShuffleArray + I - N, + N, + /*SameGroup=*/true); + N = 1; + CurGroup = compactPtrGroupBase(ShuffleArray[I]); + } else { + ++N; } - - shuffle(ShuffleArray + NumberOfBlocks - N, N, &Sci->RandState); - pushBlocksImpl(SizeClassAllocator, ClassId, Sci, - &ShuffleArray[NumberOfBlocks - N], N, - /*SameGroup=*/true); - } else { - pushBatchClassBlocks(Sci, ShuffleArray, NumberOfBlocks); } - // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record - // the requests from `PushBlocks` and `PopBatch` which are external - // interfaces. `populateFreeList` is the internal interface so we should set - // the values back to avoid incorrectly setting the stats. - Sci->FreeListInfo.PushedBlocks -= NumberOfBlocks; - - const uptr AllocatedUser = Size * NumberOfBlocks; - SizeClassAllocator->getStats().add(StatFree, AllocatedUser); - DCHECK_LE(Sci->CurrentRegionAllocated + AllocatedUser, RegionSize); - // If there is not enough room in the region currently associated to fit - // more blocks, we deassociate the region by resetting CurrentRegion and - // CurrentRegionAllocated. Otherwise, update the allocated amount. - if (RegionSize - (Sci->CurrentRegionAllocated + AllocatedUser) < Size) { - Sci->CurrentRegion = 0; - Sci->CurrentRegionAllocated = 0; - } else { - Sci->CurrentRegionAllocated += AllocatedUser; - } - Sci->AllocatedUser += AllocatedUser; + shuffle(ShuffleArray + NumberOfBlocks - N, N, &Sci->RandState); + pushBlocksImpl(SizeClassAllocator, ClassId, Sci, + &ShuffleArray[NumberOfBlocks - N], N, + /*SameGroup=*/true); + } else { + pushBatchClassBlocks(Sci, ShuffleArray, NumberOfBlocks); + } - return true; + // Note that `pushedBlocks` and `poppedBlocks` are supposed to only record + // the requests from `pushBlocks` and `PopBatch` which are external + // interfaces. `populateFreeList` is the internal interface so we should set + // the values back to avoid incorrectly setting the stats. + Sci->FreeListInfo.PushedBlocks -= NumberOfBlocks; + + const uptr AllocatedUser = Size * NumberOfBlocks; + SizeClassAllocator->getStats().add(StatFree, AllocatedUser); + DCHECK_LE(Sci->CurrentRegionAllocated + AllocatedUser, RegionSize); + // If there is not enough room in the region currently associated to fit + // more blocks, we deassociate the region by resetting CurrentRegion and + // CurrentRegionAllocated. Otherwise, update the allocated amount. + if (RegionSize - (Sci->CurrentRegionAllocated + AllocatedUser) < Size) { + Sci->CurrentRegion = 0; + Sci->CurrentRegionAllocated = 0; + } else { + Sci->CurrentRegionAllocated += AllocatedUser; } + Sci->AllocatedUser += AllocatedUser; + + return true; +} + +template +void SizeClassAllocator32::getStats(ScopedString *Str, uptr ClassId, + SizeClassInfo *Sci) + REQUIRES(Sci->Mutex) { + if (Sci->AllocatedUser == 0) + return; + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr InUse = + Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; + const uptr BytesInFreeList = Sci->AllocatedUser - InUse * BlockSize; + uptr PushedBytesDelta = 0; + if (BytesInFreeList >= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + PushedBytesDelta = + BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + } + const uptr AvailableChunks = Sci->AllocatedUser / BlockSize; + Str->append( + " %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " + "inuse: %6zu avail: %6zu releases attempted: %6zu last released: %6zuK " + "latest pushed bytes: %6zuK\n", + ClassId, getSizeByClassId(ClassId), Sci->AllocatedUser >> 10, + Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks, InUse, + AvailableChunks, Sci->ReleaseInfo.NumReleasesAttempted, + Sci->ReleaseInfo.LastReleasedBytes >> 10, PushedBytesDelta >> 10); +} + +template +void SizeClassAllocator32::getSizeClassFragmentationInfo( + SizeClassInfo *Sci, uptr ClassId, ScopedString *Str) REQUIRES(Sci->Mutex) { + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr First = Sci->MinRegionIndex; + const uptr Last = Sci->MaxRegionIndex; + const uptr Base = First * RegionSize; + const uptr NumberOfRegions = Last - First + 1U; + auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { + ScopedLock L(ByteMapMutex); + return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; + }; - void getStats(ScopedString *Str, uptr ClassId, SizeClassInfo *Sci) - REQUIRES(Sci->Mutex) { - if (Sci->AllocatedUser == 0) - return; - const uptr BlockSize = getSizeByClassId(ClassId); - const uptr InUse = - Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; - const uptr BytesInFreeList = Sci->AllocatedUser - InUse * BlockSize; - uptr PushedBytesDelta = 0; - if (BytesInFreeList >= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { - PushedBytesDelta = - BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint; - } - const uptr AvailableChunks = Sci->AllocatedUser / BlockSize; - Str->append( - " %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " - "inuse: %6zu avail: %6zu releases attempted: %6zu last released: %6zuK " - "latest pushed bytes: %6zuK\n", - ClassId, getSizeByClassId(ClassId), Sci->AllocatedUser >> 10, - Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks, InUse, - AvailableChunks, Sci->ReleaseInfo.NumReleasesAttempted, - Sci->ReleaseInfo.LastReleasedBytes >> 10, PushedBytesDelta >> 10); + FragmentationRecorder Recorder; + if (!Sci->FreeListInfo.BlockList.empty()) { + PageReleaseContext Context = markFreeBlocks( + Sci, ClassId, BlockSize, Base, NumberOfRegions, ReleaseToOS::ForceAll); + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); } - void getSizeClassFragmentationInfo(SizeClassInfo *Sci, uptr ClassId, - ScopedString *Str) REQUIRES(Sci->Mutex) { - const uptr BlockSize = getSizeByClassId(ClassId); - const uptr First = Sci->MinRegionIndex; - const uptr Last = Sci->MaxRegionIndex; - const uptr Base = First * RegionSize; - const uptr NumberOfRegions = Last - First + 1U; - auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { - ScopedLock L(ByteMapMutex); - return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; - }; - - FragmentationRecorder Recorder; - if (!Sci->FreeListInfo.BlockList.empty()) { - PageReleaseContext Context = - markFreeBlocks(Sci, ClassId, BlockSize, Base, NumberOfRegions, - ReleaseToOS::ForceAll); - releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + const uptr PageSize = getPageSizeCached(); + const uptr TotalBlocks = Sci->AllocatedUser / BlockSize; + const uptr InUseBlocks = + Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; + uptr AllocatedPagesCount = 0; + if (TotalBlocks != 0U) { + for (uptr I = 0; I < NumberOfRegions; ++I) { + if (SkipRegion(I)) + continue; + AllocatedPagesCount += RegionSize / PageSize; } - const uptr PageSize = getPageSizeCached(); - const uptr TotalBlocks = Sci->AllocatedUser / BlockSize; - const uptr InUseBlocks = - Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; - uptr AllocatedPagesCount = 0; - if (TotalBlocks != 0U) { - for (uptr I = 0; I < NumberOfRegions; ++I) { - if (SkipRegion(I)) - continue; - AllocatedPagesCount += RegionSize / PageSize; - } - - DCHECK_NE(AllocatedPagesCount, 0U); - } + DCHECK_NE(AllocatedPagesCount, 0U); + } - DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount()); - const uptr InUsePages = - AllocatedPagesCount - Recorder.getReleasedPagesCount(); - const uptr InUseBytes = InUsePages * PageSize; - - uptr Integral; - uptr Fractional; - computePercentage(BlockSize * InUseBlocks, InUseBytes, &Integral, - &Fractional); - Str->append(" %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total " - "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n", - ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages, - AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional); + DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount()); + const uptr InUsePages = + AllocatedPagesCount - Recorder.getReleasedPagesCount(); + const uptr InUseBytes = InUsePages * PageSize; + + uptr Integral; + uptr Fractional; + computePercentage(BlockSize * InUseBlocks, InUseBytes, &Integral, + &Fractional); + Str->append(" %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total " + "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n", + ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages, + AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional); +} + +template +uptr SizeClassAllocator32::releaseToOSMaybe(SizeClassInfo *Sci, + uptr ClassId, + ReleaseToOS ReleaseType) + REQUIRES(Sci->Mutex) { + const uptr BlockSize = getSizeByClassId(ClassId); + + DCHECK_GE(Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks); + const uptr BytesInFreeList = + Sci->AllocatedUser - + (Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks) * + BlockSize; + + if (UNLIKELY(BytesInFreeList == 0)) + return 0; + + // ====================================================================== // + // 1. Check if we have enough free blocks and if it's worth doing a page + // release. + // ====================================================================== // + if (ReleaseType != ReleaseToOS::ForceAll && + !hasChanceToReleasePages(Sci, BlockSize, BytesInFreeList, ReleaseType)) { + return 0; } - NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, - ReleaseToOS ReleaseType = ReleaseToOS::Normal) - REQUIRES(Sci->Mutex) { - const uptr BlockSize = getSizeByClassId(ClassId); - - DCHECK_GE(Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks); - const uptr BytesInFreeList = - Sci->AllocatedUser - - (Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks) * - BlockSize; - - if (UNLIKELY(BytesInFreeList == 0)) - return 0; - - // ====================================================================== // - // 1. Check if we have enough free blocks and if it's worth doing a page - // release. - // ====================================================================== // - if (ReleaseType != ReleaseToOS::ForceAll && - !hasChanceToReleasePages(Sci, BlockSize, BytesInFreeList, - ReleaseType)) { - return 0; - } + const uptr First = Sci->MinRegionIndex; + const uptr Last = Sci->MaxRegionIndex; + DCHECK_NE(Last, 0U); + DCHECK_LE(First, Last); + uptr TotalReleasedBytes = 0; + const uptr Base = First * RegionSize; + const uptr NumberOfRegions = Last - First + 1U; + + // The following steps contribute to the majority time spent in page + // releasing thus we increment the counter here. + ++Sci->ReleaseInfo.NumReleasesAttempted; + + // ==================================================================== // + // 2. Mark the free blocks and we can tell which pages are in-use by + // querying `PageReleaseContext`. + // ==================================================================== // + PageReleaseContext Context = markFreeBlocks(Sci, ClassId, BlockSize, Base, + NumberOfRegions, ReleaseType); + if (!Context.hasBlockMarked()) + return 0; + + // ==================================================================== // + // 3. Release the unused physical pages back to the OS. + // ==================================================================== // + ReleaseRecorder Recorder(Base); + auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { + ScopedLock L(ByteMapMutex); + return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; + }; + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); - const uptr First = Sci->MinRegionIndex; - const uptr Last = Sci->MaxRegionIndex; - DCHECK_NE(Last, 0U); - DCHECK_LE(First, Last); - uptr TotalReleasedBytes = 0; - const uptr Base = First * RegionSize; - const uptr NumberOfRegions = Last - First + 1U; - - // The following steps contribute to the majority time spent in page - // releasing thus we increment the counter here. - ++Sci->ReleaseInfo.NumReleasesAttempted; - - // ==================================================================== // - // 2. Mark the free blocks and we can tell which pages are in-use by - // querying `PageReleaseContext`. - // ==================================================================== // - PageReleaseContext Context = markFreeBlocks(Sci, ClassId, BlockSize, Base, - NumberOfRegions, ReleaseType); - if (!Context.hasBlockMarked()) - return 0; - - // ==================================================================== // - // 3. Release the unused physical pages back to the OS. - // ==================================================================== // - ReleaseRecorder Recorder(Base); - auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { - ScopedLock L(ByteMapMutex); - return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; - }; - releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + if (Recorder.getReleasedBytes() > 0) { + Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; + } + Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); - if (Recorder.getReleasedBytes() > 0) { - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; - Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); - TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; - } - Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); + return TotalReleasedBytes; +} - return TotalReleasedBytes; - } +template +bool SizeClassAllocator32::hasChanceToReleasePages( + SizeClassInfo *Sci, uptr BlockSize, uptr BytesInFreeList, + ReleaseToOS ReleaseType) REQUIRES(Sci->Mutex) { + DCHECK_GE(Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks); + const uptr PageSize = getPageSizeCached(); - bool hasChanceToReleasePages(SizeClassInfo *Sci, uptr BlockSize, - uptr BytesInFreeList, ReleaseToOS ReleaseType) - REQUIRES(Sci->Mutex) { - DCHECK_GE(Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks); - const uptr PageSize = getPageSizeCached(); + if (BytesInFreeList <= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) + Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; - if (BytesInFreeList <= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; - - // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value - // so that we won't underestimate the releasable pages. For example, the - // following is the region usage, - // - // BytesInFreeListAtLastCheckpoint AllocatedUser - // v v - // |---------------------------------------> - // ^ ^ - // BytesInFreeList ReleaseThreshold - // - // In general, if we have collected enough bytes and the amount of free - // bytes meets the ReleaseThreshold, we will try to do page release. If we - // don't update `BytesInFreeListAtLastCheckpoint` when the current - // `BytesInFreeList` is smaller, we may take longer time to wait for enough - // freed blocks because we miss the bytes between - // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). - const uptr PushedBytesDelta = - BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint; - if (PushedBytesDelta < PageSize) + // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value + // so that we won't underestimate the releasable pages. For example, the + // following is the region usage, + // + // BytesInFreeListAtLastCheckpoint AllocatedUser + // v v + // |---------------------------------------> + // ^ ^ + // BytesInFreeList ReleaseThreshold + // + // In general, if we have collected enough bytes and the amount of free + // bytes meets the ReleaseThreshold, we will try to do page release. If we + // don't update `BytesInFreeListAtLastCheckpoint` when the current + // `BytesInFreeList` is smaller, we may take longer time to wait for enough + // freed blocks because we miss the bytes between + // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). + const uptr PushedBytesDelta = + BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + if (PushedBytesDelta < PageSize) + return false; + + // Releasing smaller blocks is expensive, so we want to make sure that a + // significant amount of bytes are free, and that there has been a good + // amount of batches pushed to the freelist before attempting to release. + if (isSmallBlock(BlockSize) && ReleaseType == ReleaseToOS::Normal) + if (PushedBytesDelta < Sci->AllocatedUser / 16U) return false; - // Releasing smaller blocks is expensive, so we want to make sure that a - // significant amount of bytes are free, and that there has been a good - // amount of batches pushed to the freelist before attempting to release. - if (isSmallBlock(BlockSize) && ReleaseType == ReleaseToOS::Normal) - if (PushedBytesDelta < Sci->AllocatedUser / 16U) - return false; + if (ReleaseType == ReleaseToOS::Normal) { + const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); + if (IntervalMs < 0) + return false; - if (ReleaseType == ReleaseToOS::Normal) { - const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); - if (IntervalMs < 0) + // The constant 8 here is selected from profiling some apps and the number + // of unreleased pages in the large size classes is around 16 pages or + // more. Choose half of it as a heuristic and which also avoids page + // release every time for every pushBlocks() attempt by large blocks. + const bool ByPassReleaseInterval = + isLargeBlock(BlockSize) && PushedBytesDelta > 8 * PageSize; + if (!ByPassReleaseInterval) { + if (Sci->ReleaseInfo.LastReleaseAtNs + + static_cast(IntervalMs) * 1000000 > + getMonotonicTimeFast()) { + // Memory was returned recently. return false; - - // The constant 8 here is selected from profiling some apps and the number - // of unreleased pages in the large size classes is around 16 pages or - // more. Choose half of it as a heuristic and which also avoids page - // release every time for every pushBlocks() attempt by large blocks. - const bool ByPassReleaseInterval = - isLargeBlock(BlockSize) && PushedBytesDelta > 8 * PageSize; - if (!ByPassReleaseInterval) { - if (Sci->ReleaseInfo.LastReleaseAtNs + - static_cast(IntervalMs) * 1000000 > - getMonotonicTimeFast()) { - // Memory was returned recently. - return false; - } } - } // if (ReleaseType == ReleaseToOS::Normal) - - return true; - } - - PageReleaseContext markFreeBlocks(SizeClassInfo *Sci, const uptr ClassId, - const uptr BlockSize, const uptr Base, - const uptr NumberOfRegions, - ReleaseToOS ReleaseType) - REQUIRES(Sci->Mutex) { - const uptr PageSize = getPageSizeCached(); - const uptr GroupSize = (1UL << GroupSizeLog); - const uptr CurGroupBase = - compactPtrGroupBase(compactPtr(ClassId, Sci->CurrentRegion)); - - PageReleaseContext Context(BlockSize, NumberOfRegions, - /*ReleaseSize=*/RegionSize); + } + } // if (ReleaseType == ReleaseToOS::Normal) + + return true; +} + +template +PageReleaseContext SizeClassAllocator32::markFreeBlocks( + SizeClassInfo *Sci, const uptr ClassId, const uptr BlockSize, + const uptr Base, const uptr NumberOfRegions, ReleaseToOS ReleaseType) + REQUIRES(Sci->Mutex) { + const uptr PageSize = getPageSizeCached(); + const uptr GroupSize = (1UL << GroupSizeLog); + const uptr CurGroupBase = + compactPtrGroupBase(compactPtr(ClassId, Sci->CurrentRegion)); + + PageReleaseContext Context(BlockSize, NumberOfRegions, + /*ReleaseSize=*/RegionSize); + + auto DecompactPtr = [](CompactPtrT CompactPtr) { + return reinterpret_cast(CompactPtr); + }; + for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { + const uptr GroupBase = decompactGroupBase(BG.CompactPtrGroupBase); + // The `GroupSize` may not be divided by `BlockSize`, which means there is + // an unused space at the end of Region. Exclude that space to avoid + // unused page map entry. + uptr AllocatedGroupSize = GroupBase == CurGroupBase + ? Sci->CurrentRegionAllocated + : roundDownSlow(GroupSize, BlockSize); + if (AllocatedGroupSize == 0) + continue; + + // Batches are pushed in front of BG.Batches. The first one may + // not have all caches used. + const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch + + BG.Batches.front()->getCount(); + const uptr BytesInBG = NumBlocks * BlockSize; + + if (ReleaseType != ReleaseToOS::ForceAll) { + if (BytesInBG <= BG.BytesInBGAtLastCheckpoint) { + BG.BytesInBGAtLastCheckpoint = BytesInBG; + continue; + } - auto DecompactPtr = [](CompactPtrT CompactPtr) { - return reinterpret_cast(CompactPtr); - }; - for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { - const uptr GroupBase = decompactGroupBase(BG.CompactPtrGroupBase); - // The `GroupSize` may not be divided by `BlockSize`, which means there is - // an unused space at the end of Region. Exclude that space to avoid - // unused page map entry. - uptr AllocatedGroupSize = GroupBase == CurGroupBase - ? Sci->CurrentRegionAllocated - : roundDownSlow(GroupSize, BlockSize); - if (AllocatedGroupSize == 0) + const uptr PushedBytesDelta = BytesInBG - BG.BytesInBGAtLastCheckpoint; + if (PushedBytesDelta < PageSize) continue; - // Batches are pushed in front of BG.Batches. The first one may - // not have all caches used. - const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch + - BG.Batches.front()->getCount(); - const uptr BytesInBG = NumBlocks * BlockSize; - - if (ReleaseType != ReleaseToOS::ForceAll) { - if (BytesInBG <= BG.BytesInBGAtLastCheckpoint) { - BG.BytesInBGAtLastCheckpoint = BytesInBG; - continue; - } - - const uptr PushedBytesDelta = BytesInBG - BG.BytesInBGAtLastCheckpoint; - if (PushedBytesDelta < PageSize) - continue; - - // Given the randomness property, we try to release the pages only if - // the bytes used by free blocks exceed certain proportion of allocated - // spaces. - if (isSmallBlock(BlockSize) && (BytesInBG * 100U) / AllocatedGroupSize < - (100U - 1U - BlockSize / 16U)) { - continue; - } + // Given the randomness property, we try to release the pages only if + // the bytes used by free blocks exceed certain proportion of allocated + // spaces. + if (isSmallBlock(BlockSize) && (BytesInBG * 100U) / AllocatedGroupSize < + (100U - 1U - BlockSize / 16U)) { + continue; } + } - // TODO: Consider updating this after page release if `ReleaseRecorder` - // can tell the released bytes in each group. - BG.BytesInBGAtLastCheckpoint = BytesInBG; + // TODO: Consider updating this after page release if `ReleaseRecorder` + // can tell the released bytes in each group. + BG.BytesInBGAtLastCheckpoint = BytesInBG; - const uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize; - const uptr RegionIndex = (GroupBase - Base) / RegionSize; + const uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize; + const uptr RegionIndex = (GroupBase - Base) / RegionSize; - if (NumBlocks == MaxContainedBlocks) { - for (const auto &It : BG.Batches) - for (u16 I = 0; I < It.getCount(); ++I) - DCHECK_EQ(compactPtrGroupBase(It.get(I)), BG.CompactPtrGroupBase); + if (NumBlocks == MaxContainedBlocks) { + for (const auto &It : BG.Batches) + for (u16 I = 0; I < It.getCount(); ++I) + DCHECK_EQ(compactPtrGroupBase(It.get(I)), BG.CompactPtrGroupBase); - const uptr To = GroupBase + AllocatedGroupSize; - Context.markRangeAsAllCounted(GroupBase, To, GroupBase, RegionIndex, - AllocatedGroupSize); - } else { - DCHECK_LT(NumBlocks, MaxContainedBlocks); - - // Note that we don't always visit blocks in each BatchGroup so that we - // may miss the chance of releasing certain pages that cross - // BatchGroups. - Context.markFreeBlocksInRegion(BG.Batches, DecompactPtr, GroupBase, - RegionIndex, AllocatedGroupSize, - /*MayContainLastBlockInRegion=*/true); - } - - // We may not be able to do the page release In a rare case that we may - // fail on PageMap allocation. - if (UNLIKELY(!Context.hasBlockMarked())) - break; + const uptr To = GroupBase + AllocatedGroupSize; + Context.markRangeAsAllCounted(GroupBase, To, GroupBase, RegionIndex, + AllocatedGroupSize); + } else { + DCHECK_LT(NumBlocks, MaxContainedBlocks); + + // Note that we don't always visit blocks in each BatchGroup so that we + // may miss the chance of releasing certain pages that cross + // BatchGroups. + Context.markFreeBlocksInRegion(BG.Batches, DecompactPtr, GroupBase, + RegionIndex, AllocatedGroupSize, + /*MayContainLastBlockInRegion=*/true); } - return Context; + // We may not be able to do the page release In a rare case that we may + // fail on PageMap allocation. + if (UNLIKELY(!Context.hasBlockMarked())) + break; } - SizeClassInfo SizeClassInfoArray[NumClasses] = {}; - - HybridMutex ByteMapMutex; - // Track the regions in use, 0 is unused, otherwise store ClassId + 1. - ByteMap PossibleRegions GUARDED_BY(ByteMapMutex) = {}; - atomic_s32 ReleaseToOsIntervalMs = {}; - // Unless several threads request regions simultaneously from different size - // classes, the stash rarely contains more than 1 entry. - static constexpr uptr MaxStashedRegions = 4; - HybridMutex RegionsStashMutex; - uptr NumberOfStashedRegions GUARDED_BY(RegionsStashMutex) = 0; - uptr RegionsStash[MaxStashedRegions] GUARDED_BY(RegionsStashMutex) = {}; -}; + return Context; +} } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index e70dbe91d7acd..3cb040c514eda 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -83,463 +83,65 @@ template class SizeClassAllocator64 { } static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } - static bool conditionVariableEnabled() { return Config::hasConditionVariableT(); } + static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); } + static BlockInfo findNearestBlock(const char *RegionInfoData, + uptr Ptr) NO_THREAD_SAFETY_ANALYSIS; - void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { - DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); - - const uptr PageSize = getPageSizeCached(); - const uptr GroupSize = (1UL << GroupSizeLog); - const uptr PagesInGroup = GroupSize / PageSize; - const uptr MinSizeClass = getSizeByClassId(1); - // When trying to release pages back to memory, visiting smaller size - // classes is expensive. Therefore, we only try to release smaller size - // classes when the amount of free blocks goes over a certain threshold (See - // the comment in releaseToOSMaybe() for more details). For example, for - // size class 32, we only do the release when the size of free blocks is - // greater than 97% of pages in a group. However, this may introduce another - // issue that if the number of free blocks is bouncing between 97% ~ 100%. - // Which means we may try many page releases but only release very few of - // them (less than 3% in a group). Even though we have - // `&ReleaseToOsIntervalMs` which slightly reduce the frequency of these - // calls but it will be better to have another guard to mitigate this issue. - // - // Here we add another constraint on the minimum size requirement. The - // constraint is determined by the size of in-use blocks in the minimal size - // class. Take size class 32 as an example, - // - // +- one memory group -+ - // +----------------------+------+ - // | 97% of free blocks | | - // +----------------------+------+ - // \ / - // 3% in-use blocks - // - // * The release size threshold is 97%. - // - // The 3% size in a group is about 7 pages. For two consecutive - // releaseToOSMaybe(), we require the difference between `PushedBlocks` - // should be greater than 7 pages. This mitigates the page releasing - // thrashing which is caused by memory usage bouncing around the threshold. - // The smallest size class takes longest time to do the page release so we - // use its size of in-use blocks as a heuristic. - SmallerBlockReleasePageDelta = - PagesInGroup * (1 + MinSizeClass / 16U) / 100; - - u32 Seed; - const u64 Time = getMonotonicTimeFast(); - if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) - Seed = static_cast(Time ^ (reinterpret_cast(&Seed) >> 12)); - - for (uptr I = 0; I < NumClasses; I++) - getRegionInfo(I)->RandState = getRandomU32(&Seed); - - if (Config::getEnableContiguousRegions()) { - ReservedMemoryT ReservedMemory = {}; - // Reserve the space required for the Primary. - CHECK(ReservedMemory.create(/*Addr=*/0U, RegionSize * NumClasses, - "scudo:primary_reserve")); - const uptr PrimaryBase = ReservedMemory.getBase(); - - for (uptr I = 0; I < NumClasses; I++) { - MemMapT RegionMemMap = ReservedMemory.dispatch( - PrimaryBase + (I << RegionSizeLog), RegionSize); - RegionInfo *Region = getRegionInfo(I); - - initRegion(Region, I, RegionMemMap, Config::getEnableRandomOffset()); - } - shuffle(RegionInfoArray, NumClasses, &Seed); - } - - // The binding should be done after region shuffling so that it won't bind - // the FLLock from the wrong region. - for (uptr I = 0; I < NumClasses; I++) - getRegionInfo(I)->FLLockCV.bindTestOnly(getRegionInfo(I)->FLLock); - - // The default value in the primary config has the higher priority. - if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN) - ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs(); - setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); - } + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS; - void unmapTestOnly() { - for (uptr I = 0; I < NumClasses; I++) { - RegionInfo *Region = getRegionInfo(I); - { - ScopedLock ML(Region->MMLock); - MemMapT MemMap = Region->MemMapInfo.MemMap; - if (MemMap.isAllocated()) - MemMap.unmap(); - } - *Region = {}; - } - } + void unmapTestOnly(); // When all blocks are freed, it has to be the same size as `AllocatedUser`. - void verifyAllBlocksAreReleasedTestOnly() { - // `BatchGroup` and `Batch` also use the blocks from BatchClass. - uptr BatchClassUsedInFreeLists = 0; - for (uptr I = 0; I < NumClasses; I++) { - // We have to count BatchClassUsedInFreeLists in other regions first. - if (I == SizeClassMap::BatchClassId) - continue; - RegionInfo *Region = getRegionInfo(I); - ScopedLock ML(Region->MMLock); - ScopedLock FL(Region->FLLock); - const uptr BlockSize = getSizeByClassId(I); - uptr TotalBlocks = 0; - for (BatchGroupT &BG : Region->FreeListInfo.BlockList) { - // `BG::Batches` are `Batches`. +1 for `BatchGroup`. - BatchClassUsedInFreeLists += BG.Batches.size() + 1; - for (const auto &It : BG.Batches) - TotalBlocks += It.getCount(); - } - - DCHECK_EQ(TotalBlocks, Region->MemMapInfo.AllocatedUser / BlockSize); - DCHECK_EQ(Region->FreeListInfo.PushedBlocks, - Region->FreeListInfo.PoppedBlocks); - } - - RegionInfo *Region = getRegionInfo(SizeClassMap::BatchClassId); - ScopedLock ML(Region->MMLock); - ScopedLock FL(Region->FLLock); - const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId); - uptr TotalBlocks = 0; - for (BatchGroupT &BG : Region->FreeListInfo.BlockList) { - if (LIKELY(!BG.Batches.empty())) { - for (const auto &It : BG.Batches) - TotalBlocks += It.getCount(); - } else { - // `BatchGroup` with empty freelist doesn't have `Batch` record - // itself. - ++TotalBlocks; - } - } - DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists, - Region->MemMapInfo.AllocatedUser / BlockSize); - DCHECK_GE(Region->FreeListInfo.PoppedBlocks, - Region->FreeListInfo.PushedBlocks); - const uptr BlocksInUse = - Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; - DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); - } + void verifyAllBlocksAreReleasedTestOnly(); u16 popBlocks(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, - CompactPtrT *ToArray, const u16 MaxBlockCount) { - DCHECK_LT(ClassId, NumClasses); - RegionInfo *Region = getRegionInfo(ClassId); - u16 PopCount = 0; - - { - ScopedLock L(Region->FLLock); - PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, ToArray, - MaxBlockCount); - if (PopCount != 0U) - return PopCount; - } - - bool ReportRegionExhausted = false; - - if (conditionVariableEnabled()) { - PopCount = popBlocksWithCV(SizeClassAllocator, ClassId, Region, ToArray, - MaxBlockCount, ReportRegionExhausted); - } else { - while (true) { - // When two threads compete for `Region->MMLock`, we only want one of - // them to call populateFreeListAndPopBlocks(). To avoid both of them - // doing that, always check the freelist before mapping new pages. - ScopedLock ML(Region->MMLock); - { - ScopedLock FL(Region->FLLock); - PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, ToArray, - MaxBlockCount); - if (PopCount != 0U) - return PopCount; - } - - const bool RegionIsExhausted = Region->Exhausted; - if (!RegionIsExhausted) { - PopCount = populateFreeListAndPopBlocks( - SizeClassAllocator, ClassId, Region, ToArray, MaxBlockCount); - } - ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; - break; - } - } - - if (UNLIKELY(ReportRegionExhausted)) { - Printf("Can't populate more pages for size class %zu.\n", - getSizeByClassId(ClassId)); - - // Theoretically, BatchClass shouldn't be used up. Abort immediately when - // it happens. - if (ClassId == SizeClassMap::BatchClassId) - reportOutOfBatchClass(); - } - - return PopCount; - } + CompactPtrT *ToArray, const u16 MaxBlockCount); // Push the array of free blocks to the designated batch group. void pushBlocks(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, - CompactPtrT *Array, u32 Size) { - DCHECK_LT(ClassId, NumClasses); - DCHECK_GT(Size, 0); - - RegionInfo *Region = getRegionInfo(ClassId); - if (ClassId == SizeClassMap::BatchClassId) { - ScopedLock L(Region->FLLock); - pushBatchClassBlocks(Region, Array, Size); - if (conditionVariableEnabled()) - Region->FLLockCV.notifyAll(Region->FLLock); - return; - } - - // TODO(chiahungduan): Consider not doing grouping if the group size is not - // greater than the block size with a certain scale. - - bool SameGroup = true; - if (GroupSizeLog < RegionSizeLog) { - // Sort the blocks so that blocks belonging to the same group can be - // pushed together. - for (u32 I = 1; I < Size; ++I) { - if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) - SameGroup = false; - CompactPtrT Cur = Array[I]; - u32 J = I; - while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) { - Array[J] = Array[J - 1]; - --J; - } - Array[J] = Cur; - } - } - - { - ScopedLock L(Region->FLLock); - pushBlocksImpl(SizeClassAllocator, ClassId, Region, Array, Size, - SameGroup); - if (conditionVariableEnabled()) - Region->FLLockCV.notifyAll(Region->FLLock); - } - } - - void disable() NO_THREAD_SAFETY_ANALYSIS { - // The BatchClassId must be locked last since other classes can use it. - for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { - if (static_cast(I) == SizeClassMap::BatchClassId) - continue; - getRegionInfo(static_cast(I))->MMLock.lock(); - getRegionInfo(static_cast(I))->FLLock.lock(); - } - getRegionInfo(SizeClassMap::BatchClassId)->MMLock.lock(); - getRegionInfo(SizeClassMap::BatchClassId)->FLLock.lock(); - } - - void enable() NO_THREAD_SAFETY_ANALYSIS { - getRegionInfo(SizeClassMap::BatchClassId)->FLLock.unlock(); - getRegionInfo(SizeClassMap::BatchClassId)->MMLock.unlock(); - for (uptr I = 0; I < NumClasses; I++) { - if (I == SizeClassMap::BatchClassId) - continue; - getRegionInfo(I)->FLLock.unlock(); - getRegionInfo(I)->MMLock.unlock(); - } - } - - template void iterateOverBlocks(F Callback) { - for (uptr I = 0; I < NumClasses; I++) { - if (I == SizeClassMap::BatchClassId) - continue; - RegionInfo *Region = getRegionInfo(I); - // TODO: The call of `iterateOverBlocks` requires disabling - // SizeClassAllocator64. We may consider locking each region on demand - // only. - Region->FLLock.assertHeld(); - Region->MMLock.assertHeld(); - const uptr BlockSize = getSizeByClassId(I); - const uptr From = Region->RegionBeg; - const uptr To = From + Region->MemMapInfo.AllocatedUser; - for (uptr Block = From; Block < To; Block += BlockSize) - Callback(Block); - } - } - - void getStats(ScopedString *Str) { - // TODO(kostyak): get the RSS per region. - uptr TotalMapped = 0; - uptr PoppedBlocks = 0; - uptr PushedBlocks = 0; - for (uptr I = 0; I < NumClasses; I++) { - RegionInfo *Region = getRegionInfo(I); - { - ScopedLock L(Region->MMLock); - TotalMapped += Region->MemMapInfo.MappedUser; - } - { - ScopedLock L(Region->FLLock); - PoppedBlocks += Region->FreeListInfo.PoppedBlocks; - PushedBlocks += Region->FreeListInfo.PushedBlocks; - } - } - const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); - Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu " - "allocations; remains %zu; ReleaseToOsIntervalMs = %d\n", - TotalMapped >> 20, 0U, PoppedBlocks, - PoppedBlocks - PushedBlocks, IntervalMs >= 0 ? IntervalMs : -1); - - for (uptr I = 0; I < NumClasses; I++) { - RegionInfo *Region = getRegionInfo(I); - ScopedLock L1(Region->MMLock); - ScopedLock L2(Region->FLLock); - getStats(Str, I, Region); - } - } - - void getFragmentationInfo(ScopedString *Str) { - Str->append( - "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n", - getPageSizeCached()); + CompactPtrT *Array, u32 Size); - for (uptr I = 1; I < NumClasses; I++) { - RegionInfo *Region = getRegionInfo(I); - ScopedLock L(Region->MMLock); - getRegionFragmentationInfo(Region, I, Str); - } - } + void disable() NO_THREAD_SAFETY_ANALYSIS; + void enable() NO_THREAD_SAFETY_ANALYSIS; - void getMemoryGroupFragmentationInfo(ScopedString *Str) { - Str->append( - "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n", - getPageSizeCached()); - - for (uptr I = 1; I < NumClasses; I++) { - RegionInfo *Region = getRegionInfo(I); - ScopedLock L(Region->MMLock); - getMemoryGroupFragmentationInfoInRegion(Region, I, Str); - } - } + template void iterateOverBlocks(F Callback); - bool setOption(Option O, sptr Value) { - if (O == Option::ReleaseInterval) { - const s32 Interval = Max( - Min(static_cast(Value), Config::getMaxReleaseToOsIntervalMs()), - Config::getMinReleaseToOsIntervalMs()); - atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); - return true; - } - // Not supported by the Primary, but not an error either. - return true; - } + void getStats(ScopedString *Str); + void getFragmentationInfo(ScopedString *Str); + void getMemoryGroupFragmentationInfo(ScopedString *Str); - uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType) { - RegionInfo *Region = getRegionInfo(ClassId); - // Note that the tryLock() may fail spuriously, given that it should rarely - // happen and page releasing is fine to skip, we don't take certain - // approaches to ensure one page release is done. - if (Region->MMLock.tryLock()) { - uptr BytesReleased = releaseToOSMaybe(Region, ClassId, ReleaseType); - Region->MMLock.unlock(); - return BytesReleased; - } - return 0; - } + bool setOption(Option O, sptr Value); - uptr releaseToOS(ReleaseToOS ReleaseType) { - uptr TotalReleasedBytes = 0; - for (uptr I = 0; I < NumClasses; I++) { - if (I == SizeClassMap::BatchClassId) - continue; - RegionInfo *Region = getRegionInfo(I); - ScopedLock L(Region->MMLock); - TotalReleasedBytes += releaseToOSMaybe(Region, I, ReleaseType); - } - return TotalReleasedBytes; - } + // These are used for returning unused pages. Note that it doesn't unmap the + // pages, it only suggests that the physical pages can be released. + uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType); + uptr releaseToOS(ReleaseToOS ReleaseType); const char *getRegionInfoArrayAddress() const { return reinterpret_cast(RegionInfoArray); } - static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); } - uptr getCompactPtrBaseByClassId(uptr ClassId) { return getRegionInfo(ClassId)->RegionBeg; } - CompactPtrT compactPtr(uptr ClassId, uptr Ptr) { DCHECK_LE(ClassId, SizeClassMap::LargestClassId); return compactPtrInternal(getCompactPtrBaseByClassId(ClassId), Ptr); } - void *decompactPtr(uptr ClassId, CompactPtrT CompactPtr) { DCHECK_LE(ClassId, SizeClassMap::LargestClassId); return reinterpret_cast( decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr)); } - static BlockInfo findNearestBlock(const char *RegionInfoData, - uptr Ptr) NO_THREAD_SAFETY_ANALYSIS { - const RegionInfo *RegionInfoArray = - reinterpret_cast(RegionInfoData); - - uptr ClassId; - uptr MinDistance = -1UL; - for (uptr I = 0; I != NumClasses; ++I) { - if (I == SizeClassMap::BatchClassId) - continue; - uptr Begin = RegionInfoArray[I].RegionBeg; - // TODO(chiahungduan): In fact, We need to lock the RegionInfo::MMLock. - // However, the RegionInfoData is passed with const qualifier and lock the - // mutex requires modifying RegionInfoData, which means we need to remove - // the const qualifier. This may lead to another undefined behavior (The - // first one is accessing `AllocatedUser` without locking. It's better to - // pass `RegionInfoData` as `void *` then we can lock the mutex properly. - uptr End = Begin + RegionInfoArray[I].MemMapInfo.AllocatedUser; - if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) - continue; - uptr RegionDistance; - if (Begin <= Ptr) { - if (Ptr < End) - RegionDistance = 0; - else - RegionDistance = Ptr - End; - } else { - RegionDistance = Begin - Ptr; - } - - if (RegionDistance < MinDistance) { - MinDistance = RegionDistance; - ClassId = I; - } - } - - BlockInfo B = {}; - if (MinDistance <= 8192) { - B.RegionBegin = RegionInfoArray[ClassId].RegionBeg; - B.RegionEnd = - B.RegionBegin + RegionInfoArray[ClassId].MemMapInfo.AllocatedUser; - B.BlockSize = SizeClassMap::getSizeByClassId(ClassId); - B.BlockBegin = - B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) * - sptr(B.BlockSize)); - while (B.BlockBegin < B.RegionBegin) - B.BlockBegin += B.BlockSize; - while (B.RegionEnd < B.BlockBegin + B.BlockSize) - B.BlockBegin -= B.BlockSize; - } - return B; - } - AtomicOptions Options; private: static const uptr RegionSize = 1UL << RegionSizeLog; static const uptr NumClasses = SizeClassMap::NumClasses; - static const uptr MapSizeIncrement = Config::getMapSizeIncrement(); // Fill at most this number of batches from the newly map'd memory. static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; @@ -607,1254 +209,1756 @@ template class SizeClassAllocator64 { return Region->MemMapInfo.MemMap.getBase(); } - static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) { + CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) const { return static_cast((Ptr - Base) >> CompactPtrScale); } - - static uptr decompactPtrInternal(uptr Base, CompactPtrT CompactPtr) { + uptr decompactPtrInternal(uptr Base, CompactPtrT CompactPtr) const { return Base + (static_cast(CompactPtr) << CompactPtrScale); } - - static uptr compactPtrGroup(CompactPtrT CompactPtr) { + uptr compactPtrGroup(CompactPtrT CompactPtr) const { const uptr Mask = (static_cast(1) << GroupScale) - 1; return static_cast(CompactPtr) & ~Mask; } - static uptr decompactGroupBase(uptr Base, uptr CompactPtrGroupBase) { + uptr decompactGroupBase(uptr Base, uptr CompactPtrGroupBase) const { DCHECK_EQ(CompactPtrGroupBase % (static_cast(1) << (GroupScale)), 0U); return Base + (CompactPtrGroupBase << CompactPtrScale); } - - ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) { + ALWAYS_INLINE bool isSmallBlock(uptr BlockSize) const { const uptr PageSize = getPageSizeCached(); return BlockSize < PageSize / 16U; } - ALWAYS_INLINE uptr getMinReleaseAttemptSize(uptr BlockSize) { return roundUp(BlockSize, getPageSizeCached()); } ALWAYS_INLINE void initRegion(RegionInfo *Region, uptr ClassId, MemMapT MemMap, bool EnableRandomOffset) - REQUIRES(Region->MMLock) { - DCHECK(!Region->MemMapInfo.MemMap.isAllocated()); - DCHECK(MemMap.isAllocated()); + REQUIRES(Region->MMLock); - const uptr PageSize = getPageSizeCached(); + void pushBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, + RegionInfo *Region, CompactPtrT *Array, u32 Size, + bool SameGroup = false) REQUIRES(Region->FLLock); - Region->MemMapInfo.MemMap = MemMap; + // Similar to `pushBlocksImpl` but has some logics specific to BatchClass. + void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size) + REQUIRES(Region->FLLock); - Region->RegionBeg = MemMap.getBase(); - if (EnableRandomOffset) { - Region->RegionBeg += - (getRandomModN(&Region->RandState, 16) + 1) * PageSize; - } + // Pop at most `MaxBlockCount` from the freelist of the given region. + u16 popBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, + RegionInfo *Region, CompactPtrT *ToArray, + const u16 MaxBlockCount) REQUIRES(Region->FLLock); + // Same as `popBlocksImpl` but is used when conditional variable is enabled. + u16 popBlocksWithCV(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, + RegionInfo *Region, CompactPtrT *ToArray, + const u16 MaxBlockCount, bool &ReportRegionExhausted); - const uptr BlockSize = getSizeByClassId(ClassId); - // Releasing small blocks is expensive, set a higher threshold to avoid - // frequent page releases. - if (isSmallBlock(BlockSize)) { - Region->ReleaseInfo.TryReleaseThreshold = - PageSize * SmallerBlockReleasePageDelta; - } else { - Region->ReleaseInfo.TryReleaseThreshold = - getMinReleaseAttemptSize(BlockSize); - } - } + // When there's no blocks available in the freelist, it tries to prepare more + // blocks by mapping more pages. + NOINLINE u16 populateFreeListAndPopBlocks( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, RegionInfo *Region, + CompactPtrT *ToArray, const u16 MaxBlockCount) REQUIRES(Region->MMLock) + EXCLUDES(Region->FLLock); - void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size) - REQUIRES(Region->FLLock) { - DCHECK_EQ(Region, getRegionInfo(SizeClassMap::BatchClassId)); - - // Free blocks are recorded by Batch in freelist for all - // size-classes. In addition, Batch is allocated from BatchClassId. - // In order not to use additional block to record the free blocks in - // BatchClassId, they are self-contained. I.e., A Batch records the - // block address of itself. See the figure below: - // - // Batch at 0xABCD - // +----------------------------+ - // | Free blocks' addr | - // | +------+------+------+ | - // | |0xABCD|... |... | | - // | +------+------+------+ | - // +----------------------------+ - // - // When we allocate all the free blocks in the Batch, the block used - // by Batch is also free for use. We don't need to recycle the - // Batch. Note that the correctness is maintained by the invariant, - // - // Each popBlocks() request returns the entire Batch. Returning - // part of the blocks in a Batch is invalid. - // - // This ensures that Batch won't leak the address itself while it's - // still holding other valid data. - // - // Besides, BatchGroup is also allocated from BatchClassId and has its - // address recorded in the Batch too. To maintain the correctness, - // - // The address of BatchGroup is always recorded in the last Batch - // in the freelist (also imply that the freelist should only be - // updated with push_front). Once the last Batch is popped, - // the block used by BatchGroup is also free for use. - // - // With this approach, the blocks used by BatchGroup and Batch are - // reusable and don't need additional space for them. + void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region) + REQUIRES(Region->MMLock, Region->FLLock); + void getRegionFragmentationInfo(RegionInfo *Region, uptr ClassId, + ScopedString *Str) REQUIRES(Region->MMLock); + void getMemoryGroupFragmentationInfoInRegion(RegionInfo *Region, uptr ClassId, + ScopedString *Str) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock); - Region->FreeListInfo.PushedBlocks += Size; - BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, + ReleaseToOS ReleaseType = ReleaseToOS::Normal) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock); + bool hasChanceToReleasePages(RegionInfo *Region, uptr BlockSize, + uptr BytesInFreeList, ReleaseToOS ReleaseType) + REQUIRES(Region->MMLock, Region->FLLock); + SinglyLinkedList + collectGroupsToRelease(RegionInfo *Region, const uptr BlockSize, + const uptr AllocatedUserEnd, const uptr CompactPtrBase) + REQUIRES(Region->MMLock, Region->FLLock); + PageReleaseContext + markFreeBlocks(RegionInfo *Region, const uptr BlockSize, + const uptr AllocatedUserEnd, const uptr CompactPtrBase, + SinglyLinkedList &GroupsToRelease) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock); - if (BG == nullptr) { - // Construct `BatchGroup` on the last element. - BG = reinterpret_cast( - decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); - --Size; - BG->Batches.clear(); - // BatchClass hasn't enabled memory group. Use `0` to indicate there's no - // memory group here. - BG->CompactPtrGroupBase = 0; - BG->BytesInBGAtLastCheckpoint = 0; - BG->MaxCachedPerBatch = SizeClassAllocatorT::getMaxCached( - getSizeByClassId(SizeClassMap::BatchClassId)); - - Region->FreeListInfo.BlockList.push_front(BG); - } + void mergeGroupsToReleaseBack(RegionInfo *Region, + SinglyLinkedList &GroupsToRelease) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock); - if (UNLIKELY(Size == 0)) - return; - - // This happens under 2 cases. - // 1. just allocated a new `BatchGroup`. - // 2. Only 1 block is pushed when the freelist is empty. - if (BG->Batches.empty()) { - // Construct the `Batch` on the last element. - BatchT *TB = reinterpret_cast( - decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); - TB->clear(); - // As mentioned above, addresses of `Batch` and `BatchGroup` are - // recorded in the Batch. - TB->add(Array[Size - 1]); - TB->add( - compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(BG))); - --Size; - BG->Batches.push_front(TB); - } + // The minimum size of pushed blocks that we will try to release the pages in + // that size class. + uptr SmallerBlockReleasePageDelta = 0; + atomic_s32 ReleaseToOsIntervalMs = {}; + alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; +}; - BatchT *CurBatch = BG->Batches.front(); - DCHECK_NE(CurBatch, nullptr); +template +void SizeClassAllocator64::init(s32 ReleaseToOsInterval) + NO_THREAD_SAFETY_ANALYSIS { + DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); + + const uptr PageSize = getPageSizeCached(); + const uptr GroupSize = (1UL << GroupSizeLog); + const uptr PagesInGroup = GroupSize / PageSize; + const uptr MinSizeClass = getSizeByClassId(1); + // When trying to release pages back to memory, visiting smaller size + // classes is expensive. Therefore, we only try to release smaller size + // classes when the amount of free blocks goes over a certain threshold (See + // the comment in releaseToOSMaybe() for more details). For example, for + // size class 32, we only do the release when the size of free blocks is + // greater than 97% of pages in a group. However, this may introduce another + // issue that if the number of free blocks is bouncing between 97% ~ 100%. + // Which means we may try many page releases but only release very few of + // them (less than 3% in a group). Even though we have + // `&ReleaseToOsIntervalMs` which slightly reduce the frequency of these + // calls but it will be better to have another guard to mitigate this issue. + // + // Here we add another constraint on the minimum size requirement. The + // constraint is determined by the size of in-use blocks in the minimal size + // class. Take size class 32 as an example, + // + // +- one memory group -+ + // +----------------------+------+ + // | 97% of free blocks | | + // +----------------------+------+ + // \ / + // 3% in-use blocks + // + // * The release size threshold is 97%. + // + // The 3% size in a group is about 7 pages. For two consecutive + // releaseToOSMaybe(), we require the difference between `PushedBlocks` + // should be greater than 7 pages. This mitigates the page releasing + // thrashing which is caused by memory usage bouncing around the threshold. + // The smallest size class takes longest time to do the page release so we + // use its size of in-use blocks as a heuristic. + SmallerBlockReleasePageDelta = PagesInGroup * (1 + MinSizeClass / 16U) / 100; + + u32 Seed; + const u64 Time = getMonotonicTimeFast(); + if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) + Seed = static_cast(Time ^ (reinterpret_cast(&Seed) >> 12)); + + for (uptr I = 0; I < NumClasses; I++) + getRegionInfo(I)->RandState = getRandomU32(&Seed); + + if (Config::getEnableContiguousRegions()) { + ReservedMemoryT ReservedMemory = {}; + // Reserve the space required for the Primary. + CHECK(ReservedMemory.create(/*Addr=*/0U, RegionSize * NumClasses, + "scudo:primary_reserve")); + const uptr PrimaryBase = ReservedMemory.getBase(); - for (u32 I = 0; I < Size;) { - u16 UnusedSlots = - static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); - if (UnusedSlots == 0) { - CurBatch = reinterpret_cast( - decompactPtr(SizeClassMap::BatchClassId, Array[I])); - CurBatch->clear(); - // Self-contained - CurBatch->add(Array[I]); - ++I; - // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of - // BatchClassId. - BG->Batches.push_front(CurBatch); - UnusedSlots = static_cast(BG->MaxCachedPerBatch - 1); - } - // `UnusedSlots` is u16 so the result will be also fit in u16. - const u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); - CurBatch->appendFromArray(&Array[I], AppendSize); - I += AppendSize; + for (uptr I = 0; I < NumClasses; I++) { + MemMapT RegionMemMap = ReservedMemory.dispatch( + PrimaryBase + (I << RegionSizeLog), RegionSize); + RegionInfo *Region = getRegionInfo(I); + + initRegion(Region, I, RegionMemMap, Config::getEnableRandomOffset()); } + shuffle(RegionInfoArray, NumClasses, &Seed); } - // Push the blocks to their batch group. The layout will be like, - // - // FreeListInfo.BlockList - > BG -> BG -> BG - // | | | - // v v v - // TB TB TB - // | - // v - // TB - // - // Each BlockGroup(BG) will associate with unique group id and the free blocks - // are managed by a list of Batch(TB). To reduce the time of inserting - // blocks, BGs are sorted and the input `Array` are supposed to be sorted so - // that we can get better performance of maintaining sorted property. - // Use `SameGroup=true` to indicate that all blocks in the array are from the - // same group then we will skip checking the group id of each block. - void pushBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, - RegionInfo *Region, CompactPtrT *Array, u32 Size, - bool SameGroup = false) REQUIRES(Region->FLLock) { - DCHECK_NE(ClassId, SizeClassMap::BatchClassId); - DCHECK_GT(Size, 0U); - - auto CreateGroup = [&](uptr CompactPtrGroupBase) { - BatchGroupT *BG = reinterpret_cast( - SizeClassAllocator->getBatchClassBlock()); - BG->Batches.clear(); - BatchT *TB = - reinterpret_cast(SizeClassAllocator->getBatchClassBlock()); - TB->clear(); - - BG->CompactPtrGroupBase = CompactPtrGroupBase; - BG->Batches.push_front(TB); - BG->BytesInBGAtLastCheckpoint = 0; - BG->MaxCachedPerBatch = MaxNumBlocksInBatch; - - return BG; - }; - - auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) { - SinglyLinkedList &Batches = BG->Batches; - BatchT *CurBatch = Batches.front(); - DCHECK_NE(CurBatch, nullptr); - - for (u32 I = 0; I < Size;) { - DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount()); - u16 UnusedSlots = - static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); - if (UnusedSlots == 0) { - CurBatch = reinterpret_cast( - SizeClassAllocator->getBatchClassBlock()); - CurBatch->clear(); - Batches.push_front(CurBatch); - UnusedSlots = BG->MaxCachedPerBatch; - } - // `UnusedSlots` is u16 so the result will be also fit in u16. - u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); - CurBatch->appendFromArray(&Array[I], AppendSize); - I += AppendSize; - } - }; + // The binding should be done after region shuffling so that it won't bind + // the FLLock from the wrong region. + for (uptr I = 0; I < NumClasses; I++) + getRegionInfo(I)->FLLockCV.bindTestOnly(getRegionInfo(I)->FLLock); - Region->FreeListInfo.PushedBlocks += Size; - BatchGroupT *Cur = Region->FreeListInfo.BlockList.front(); + // The default value in the primary config has the higher priority. + if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN) + ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs(); + setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); +} - // In the following, `Cur` always points to the BatchGroup for blocks that - // will be pushed next. `Prev` is the element right before `Cur`. - BatchGroupT *Prev = nullptr; +template +void SizeClassAllocator64::initRegion(RegionInfo *Region, uptr ClassId, + MemMapT MemMap, + bool EnableRandomOffset) + REQUIRES(Region->MMLock) { + DCHECK(!Region->MemMapInfo.MemMap.isAllocated()); + DCHECK(MemMap.isAllocated()); - while (Cur != nullptr && - compactPtrGroup(Array[0]) > Cur->CompactPtrGroupBase) { - Prev = Cur; - Cur = Cur->Next; - } + const uptr PageSize = getPageSizeCached(); - if (Cur == nullptr || - compactPtrGroup(Array[0]) != Cur->CompactPtrGroupBase) { - Cur = CreateGroup(compactPtrGroup(Array[0])); - if (Prev == nullptr) - Region->FreeListInfo.BlockList.push_front(Cur); - else - Region->FreeListInfo.BlockList.insert(Prev, Cur); - } + Region->MemMapInfo.MemMap = MemMap; - // All the blocks are from the same group, just push without checking group - // id. - if (SameGroup) { - for (u32 I = 0; I < Size; ++I) - DCHECK_EQ(compactPtrGroup(Array[I]), Cur->CompactPtrGroupBase); + Region->RegionBeg = MemMap.getBase(); + if (EnableRandomOffset) { + Region->RegionBeg += (getRandomModN(&Region->RandState, 16) + 1) * PageSize; + } - InsertBlocks(Cur, Array, Size); - return; - } + const uptr BlockSize = getSizeByClassId(ClassId); + // Releasing small blocks is expensive, set a higher threshold to avoid + // frequent page releases. + if (isSmallBlock(BlockSize)) { + Region->ReleaseInfo.TryReleaseThreshold = + PageSize * SmallerBlockReleasePageDelta; + } else { + Region->ReleaseInfo.TryReleaseThreshold = + getMinReleaseAttemptSize(BlockSize); + } +} - // The blocks are sorted by group id. Determine the segment of group and - // push them to their group together. - u32 Count = 1; - for (u32 I = 1; I < Size; ++I) { - if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) { - DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->CompactPtrGroupBase); - InsertBlocks(Cur, Array + I - Count, Count); - - while (Cur != nullptr && - compactPtrGroup(Array[I]) > Cur->CompactPtrGroupBase) { - Prev = Cur; - Cur = Cur->Next; - } +template void SizeClassAllocator64::unmapTestOnly() { + for (uptr I = 0; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + { + ScopedLock ML(Region->MMLock); + MemMapT MemMap = Region->MemMapInfo.MemMap; + if (MemMap.isAllocated()) + MemMap.unmap(); + } + *Region = {}; + } +} + +template +void SizeClassAllocator64::verifyAllBlocksAreReleasedTestOnly() { + // `BatchGroup` and `Batch` also use the blocks from BatchClass. + uptr BatchClassUsedInFreeLists = 0; + for (uptr I = 0; I < NumClasses; I++) { + // We have to count BatchClassUsedInFreeLists in other regions first. + if (I == SizeClassMap::BatchClassId) + continue; + RegionInfo *Region = getRegionInfo(I); + ScopedLock ML(Region->MMLock); + ScopedLock FL(Region->FLLock); + const uptr BlockSize = getSizeByClassId(I); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Region->FreeListInfo.BlockList) { + // `BG::Batches` are `Batches`. +1 for `BatchGroup`. + BatchClassUsedInFreeLists += BG.Batches.size() + 1; + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } - if (Cur == nullptr || - compactPtrGroup(Array[I]) != Cur->CompactPtrGroupBase) { - Cur = CreateGroup(compactPtrGroup(Array[I])); - DCHECK_NE(Prev, nullptr); - Region->FreeListInfo.BlockList.insert(Prev, Cur); - } + DCHECK_EQ(TotalBlocks, Region->MemMapInfo.AllocatedUser / BlockSize); + DCHECK_EQ(Region->FreeListInfo.PushedBlocks, + Region->FreeListInfo.PoppedBlocks); + } - Count = 1; - } else { - ++Count; - } + RegionInfo *Region = getRegionInfo(SizeClassMap::BatchClassId); + ScopedLock ML(Region->MMLock); + ScopedLock FL(Region->FLLock); + const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Region->FreeListInfo.BlockList) { + if (LIKELY(!BG.Batches.empty())) { + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } else { + // `BatchGroup` with empty freelist doesn't have `Batch` record + // itself. + ++TotalBlocks; } - - InsertBlocks(Cur, Array + Size - Count, Count); + } + DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists, + Region->MemMapInfo.AllocatedUser / BlockSize); + DCHECK_GE(Region->FreeListInfo.PoppedBlocks, + Region->FreeListInfo.PushedBlocks); + const uptr BlocksInUse = + Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; + DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); +} + +template +u16 SizeClassAllocator64::popBlocks( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, CompactPtrT *ToArray, + const u16 MaxBlockCount) { + DCHECK_LT(ClassId, NumClasses); + RegionInfo *Region = getRegionInfo(ClassId); + u16 PopCount = 0; + + { + ScopedLock L(Region->FLLock); + PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, ToArray, + MaxBlockCount); + if (PopCount != 0U) + return PopCount; } - u16 popBlocksWithCV(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, - RegionInfo *Region, CompactPtrT *ToArray, - const u16 MaxBlockCount, bool &ReportRegionExhausted) { - u16 PopCount = 0; + bool ReportRegionExhausted = false; + if (conditionVariableEnabled()) { + PopCount = popBlocksWithCV(SizeClassAllocator, ClassId, Region, ToArray, + MaxBlockCount, ReportRegionExhausted); + } else { while (true) { - // We only expect one thread doing the freelist refillment and other - // threads will be waiting for either the completion of the - // `populateFreeListAndPopBlocks()` or `pushBlocks()` called by other - // threads. - bool PopulateFreeList = false; + // When two threads compete for `Region->MMLock`, we only want one of + // them to call populateFreeListAndPopBlocks(). To avoid both of them + // doing that, always check the freelist before mapping new pages. + ScopedLock ML(Region->MMLock); { ScopedLock FL(Region->FLLock); - if (!Region->isPopulatingFreeList) { - Region->isPopulatingFreeList = true; - PopulateFreeList = true; - } + PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, ToArray, + MaxBlockCount); + if (PopCount != 0U) + return PopCount; } - if (PopulateFreeList) { - ScopedLock ML(Region->MMLock); + const bool RegionIsExhausted = Region->Exhausted; + if (!RegionIsExhausted) { + PopCount = populateFreeListAndPopBlocks(SizeClassAllocator, ClassId, + Region, ToArray, MaxBlockCount); + } + ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; + break; + } + } - const bool RegionIsExhausted = Region->Exhausted; - if (!RegionIsExhausted) { - PopCount = populateFreeListAndPopBlocks( - SizeClassAllocator, ClassId, Region, ToArray, MaxBlockCount); - } - ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; - - { - // Before reacquiring the `FLLock`, the freelist may be used up again - // and some threads are waiting for the freelist refillment by the - // current thread. It's important to set - // `Region->isPopulatingFreeList` to false so the threads about to - // sleep will notice the status change. - ScopedLock FL(Region->FLLock); - Region->isPopulatingFreeList = false; - Region->FLLockCV.notifyAll(Region->FLLock); - } + if (UNLIKELY(ReportRegionExhausted)) { + Printf("Can't populate more pages for size class %zu.\n", + getSizeByClassId(ClassId)); - break; - } + // Theoretically, BatchClass shouldn't be used up. Abort immediately when + // it happens. + if (ClassId == SizeClassMap::BatchClassId) + reportOutOfBatchClass(); + } - // At here, there are two preconditions to be met before waiting, - // 1. The freelist is empty. - // 2. Region->isPopulatingFreeList == true, i.e, someone is still doing - // `populateFreeListAndPopBlocks()`. - // - // Note that it has the chance that freelist is empty but - // Region->isPopulatingFreeList == false because all the new populated - // blocks were used up right after the refillment. Therefore, we have to - // check if someone is still populating the freelist. + return PopCount; +} + +template +u16 SizeClassAllocator64::popBlocksWithCV( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, RegionInfo *Region, + CompactPtrT *ToArray, const u16 MaxBlockCount, + bool &ReportRegionExhausted) { + u16 PopCount = 0; + + while (true) { + // We only expect one thread doing the freelist refillment and other + // threads will be waiting for either the completion of the + // `populateFreeListAndPopBlocks()` or `pushBlocks()` called by other + // threads. + bool PopulateFreeList = false; + { ScopedLock FL(Region->FLLock); - PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, ToArray, - MaxBlockCount); - if (PopCount != 0U) - break; + if (!Region->isPopulatingFreeList) { + Region->isPopulatingFreeList = true; + PopulateFreeList = true; + } + } - if (!Region->isPopulatingFreeList) - continue; + if (PopulateFreeList) { + ScopedLock ML(Region->MMLock); - // Now the freelist is empty and someone's doing the refillment. We will - // wait until anyone refills the freelist or someone finishes doing - // `populateFreeListAndPopBlocks()`. The refillment can be done by - // `populateFreeListAndPopBlocks()`, `pushBlocks()`, - // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`. - Region->FLLockCV.wait(Region->FLLock); - - PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, ToArray, - MaxBlockCount); - if (PopCount != 0U) - break; + const bool RegionIsExhausted = Region->Exhausted; + if (!RegionIsExhausted) { + PopCount = populateFreeListAndPopBlocks(SizeClassAllocator, ClassId, + Region, ToArray, MaxBlockCount); + } + ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; + + { + // Before reacquiring the `FLLock`, the freelist may be used up again + // and some threads are waiting for the freelist refillment by the + // current thread. It's important to set + // `Region->isPopulatingFreeList` to false so the threads about to + // sleep will notice the status change. + ScopedLock FL(Region->FLLock); + Region->isPopulatingFreeList = false; + Region->FLLockCV.notifyAll(Region->FLLock); + } + + break; } - return PopCount; + // At here, there are two preconditions to be met before waiting, + // 1. The freelist is empty. + // 2. Region->isPopulatingFreeList == true, i.e, someone is still doing + // `populateFreeListAndPopBlocks()`. + // + // Note that it has the chance that freelist is empty but + // Region->isPopulatingFreeList == false because all the new populated + // blocks were used up right after the refillment. Therefore, we have to + // check if someone is still populating the freelist. + ScopedLock FL(Region->FLLock); + PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, ToArray, + MaxBlockCount); + if (PopCount != 0U) + break; + + if (!Region->isPopulatingFreeList) + continue; + + // Now the freelist is empty and someone's doing the refillment. We will + // wait until anyone refills the freelist or someone finishes doing + // `populateFreeListAndPopBlocks()`. The refillment can be done by + // `populateFreeListAndPopBlocks()`, `pushBlocks()`, + // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`. + Region->FLLockCV.wait(Region->FLLock); + + PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, ToArray, + MaxBlockCount); + if (PopCount != 0U) + break; } - u16 popBlocksImpl(SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, - RegionInfo *Region, CompactPtrT *ToArray, - const u16 MaxBlockCount) REQUIRES(Region->FLLock) { - if (Region->FreeListInfo.BlockList.empty()) - return 0U; + return PopCount; +} - SinglyLinkedList &Batches = - Region->FreeListInfo.BlockList.front()->Batches; +template +u16 SizeClassAllocator64::popBlocksImpl( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, RegionInfo *Region, + CompactPtrT *ToArray, const u16 MaxBlockCount) REQUIRES(Region->FLLock) { + if (Region->FreeListInfo.BlockList.empty()) + return 0U; + + SinglyLinkedList &Batches = + Region->FreeListInfo.BlockList.front()->Batches; + + if (Batches.empty()) { + DCHECK_EQ(ClassId, SizeClassMap::BatchClassId); + BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + Region->FreeListInfo.BlockList.pop_front(); + + // Block used by `BatchGroup` is from BatchClassId. Turn the block into + // `Batch` with single block. + BatchT *TB = reinterpret_cast(BG); + ToArray[0] = + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(TB)); + Region->FreeListInfo.PoppedBlocks += 1; + return 1U; + } + + // So far, instead of always filling blocks to `MaxBlockCount`, we only + // examine single `Batch` to minimize the time spent in the primary + // allocator. Besides, the sizes of `Batch` and + // `SizeClassAllocatorT::getMaxCached()` may also impact the time spent on + // accessing the primary allocator. + // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` + // blocks and/or adjust the size of `Batch` according to + // `SizeClassAllocatorT::getMaxCached()`. + BatchT *B = Batches.front(); + DCHECK_NE(B, nullptr); + DCHECK_GT(B->getCount(), 0U); + + // BachClassId should always take all blocks in the Batch. Read the + // comment in `pushBatchClassBlocks()` for more details. + const u16 PopCount = ClassId == SizeClassMap::BatchClassId + ? B->getCount() + : Min(MaxBlockCount, B->getCount()); + B->moveNToArray(ToArray, PopCount); + + // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be + // done without holding `FLLock`. + if (B->empty()) { + Batches.pop_front(); + // `Batch` of BatchClassId is self-contained, no need to + // deallocate. Read the comment in `pushBatchClassBlocks()` for more + // details. + if (ClassId != SizeClassMap::BatchClassId) + SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, B); if (Batches.empty()) { - DCHECK_EQ(ClassId, SizeClassMap::BatchClassId); BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); Region->FreeListInfo.BlockList.pop_front(); - // Block used by `BatchGroup` is from BatchClassId. Turn the block into - // `Batch` with single block. - BatchT *TB = reinterpret_cast(BG); - ToArray[0] = - compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(TB)); - Region->FreeListInfo.PoppedBlocks += 1; - return 1U; - } - - // So far, instead of always filling blocks to `MaxBlockCount`, we only - // examine single `Batch` to minimize the time spent in the primary - // allocator. Besides, the sizes of `Batch` and - // `SizeClassAllocatorT::getMaxCached()` may also impact the time spent on - // accessing the primary allocator. - // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount` - // blocks and/or adjust the size of `Batch` according to - // `SizeClassAllocatorT::getMaxCached()`. - BatchT *B = Batches.front(); - DCHECK_NE(B, nullptr); - DCHECK_GT(B->getCount(), 0U); - - // BachClassId should always take all blocks in the Batch. Read the - // comment in `pushBatchClassBlocks()` for more details. - const u16 PopCount = ClassId == SizeClassMap::BatchClassId - ? B->getCount() - : Min(MaxBlockCount, B->getCount()); - B->moveNToArray(ToArray, PopCount); - - // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be - // done without holding `FLLock`. - if (B->empty()) { - Batches.pop_front(); - // `Batch` of BatchClassId is self-contained, no need to - // deallocate. Read the comment in `pushBatchClassBlocks()` for more - // details. + // We don't keep BatchGroup with zero blocks to avoid empty-checking + // while allocating. Note that block used for constructing BatchGroup is + // recorded as free blocks in the last element of BatchGroup::Batches. + // Which means, once we pop the last Batch, the block is + // implicitly deallocated. if (ClassId != SizeClassMap::BatchClassId) - SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, B); - - if (Batches.empty()) { - BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); - Region->FreeListInfo.BlockList.pop_front(); - - // We don't keep BatchGroup with zero blocks to avoid empty-checking - // while allocating. Note that block used for constructing BatchGroup is - // recorded as free blocks in the last element of BatchGroup::Batches. - // Which means, once we pop the last Batch, the block is - // implicitly deallocated. - if (ClassId != SizeClassMap::BatchClassId) - SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, BG); - } + SizeClassAllocator->deallocate(SizeClassMap::BatchClassId, BG); } + } - Region->FreeListInfo.PoppedBlocks += PopCount; - - return PopCount; + Region->FreeListInfo.PoppedBlocks += PopCount; + + return PopCount; +} + +template +u16 SizeClassAllocator64::populateFreeListAndPopBlocks( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, RegionInfo *Region, + CompactPtrT *ToArray, const u16 MaxBlockCount) REQUIRES(Region->MMLock) + EXCLUDES(Region->FLLock) { + if (!Config::getEnableContiguousRegions() && + !Region->MemMapInfo.MemMap.isAllocated()) { + ReservedMemoryT ReservedMemory; + if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize, + "scudo:primary_reserve", + MAP_ALLOWNOMEM))) { + Printf("Can't reserve pages for size class %zu.\n", + getSizeByClassId(ClassId)); + return 0U; + } + initRegion(Region, ClassId, + ReservedMemory.dispatch(ReservedMemory.getBase(), + ReservedMemory.getCapacity()), + /*EnableRandomOffset=*/false); } - NOINLINE u16 populateFreeListAndPopBlocks( - SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, RegionInfo *Region, - CompactPtrT *ToArray, const u16 MaxBlockCount) REQUIRES(Region->MMLock) - EXCLUDES(Region->FLLock) { - if (!Config::getEnableContiguousRegions() && - !Region->MemMapInfo.MemMap.isAllocated()) { - ReservedMemoryT ReservedMemory; - if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize, - "scudo:primary_reserve", - MAP_ALLOWNOMEM))) { - Printf("Can't reserve pages for size class %zu.\n", - getSizeByClassId(ClassId)); - return 0U; - } - initRegion(Region, ClassId, - ReservedMemory.dispatch(ReservedMemory.getBase(), - ReservedMemory.getCapacity()), - /*EnableRandomOffset=*/false); + DCHECK(Region->MemMapInfo.MemMap.isAllocated()); + const uptr Size = getSizeByClassId(ClassId); + const u16 MaxCount = SizeClassAllocatorT::getMaxCached(Size); + const uptr RegionBeg = Region->RegionBeg; + const uptr MappedUser = Region->MemMapInfo.MappedUser; + const uptr TotalUserBytes = + Region->MemMapInfo.AllocatedUser + MaxCount * Size; + // Map more space for blocks, if necessary. + if (TotalUserBytes > MappedUser) { + // Do the mmap for the user memory. + const uptr MapSize = roundUp(TotalUserBytes - MappedUser, MapSizeIncrement); + const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); + if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) { + Region->Exhausted = true; + return 0U; } - DCHECK(Region->MemMapInfo.MemMap.isAllocated()); - const uptr Size = getSizeByClassId(ClassId); - const u16 MaxCount = SizeClassAllocatorT::getMaxCached(Size); - const uptr RegionBeg = Region->RegionBeg; - const uptr MappedUser = Region->MemMapInfo.MappedUser; - const uptr TotalUserBytes = - Region->MemMapInfo.AllocatedUser + MaxCount * Size; - // Map more space for blocks, if necessary. - if (TotalUserBytes > MappedUser) { - // Do the mmap for the user memory. - const uptr MapSize = - roundUp(TotalUserBytes - MappedUser, MapSizeIncrement); - const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); - if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) { - Region->Exhausted = true; - return 0U; - } + if (UNLIKELY(!Region->MemMapInfo.MemMap.remap( + RegionBeg + MappedUser, MapSize, "scudo:primary", + MAP_ALLOWNOMEM | MAP_RESIZABLE | + (useMemoryTagging(Options.load()) ? MAP_MEMTAG : 0)))) { + return 0U; + } + Region->MemMapInfo.MappedUser += MapSize; + SizeClassAllocator->getStats().add(StatMapped, MapSize); + } - if (UNLIKELY(!Region->MemMapInfo.MemMap.remap( - RegionBeg + MappedUser, MapSize, "scudo:primary", - MAP_ALLOWNOMEM | MAP_RESIZABLE | - (useMemoryTagging(Options.load()) ? MAP_MEMTAG - : 0)))) { - return 0U; + const u32 NumberOfBlocks = + Min(MaxNumBatches * MaxCount, + static_cast((Region->MemMapInfo.MappedUser - + Region->MemMapInfo.AllocatedUser) / + Size)); + DCHECK_GT(NumberOfBlocks, 0); + + constexpr u32 ShuffleArraySize = MaxNumBatches * MaxNumBlocksInBatch; + CompactPtrT ShuffleArray[ShuffleArraySize]; + DCHECK_LE(NumberOfBlocks, ShuffleArraySize); + + const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); + uptr P = RegionBeg + Region->MemMapInfo.AllocatedUser; + for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) + ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P); + + ScopedLock L(Region->FLLock); + + if (ClassId != SizeClassMap::BatchClassId) { + u32 N = 1; + uptr CurGroup = compactPtrGroup(ShuffleArray[0]); + for (u32 I = 1; I < NumberOfBlocks; I++) { + if (UNLIKELY(compactPtrGroup(ShuffleArray[I]) != CurGroup)) { + shuffle(ShuffleArray + I - N, N, &Region->RandState); + pushBlocksImpl(SizeClassAllocator, ClassId, Region, + ShuffleArray + I - N, N, + /*SameGroup=*/true); + N = 1; + CurGroup = compactPtrGroup(ShuffleArray[I]); + } else { + ++N; } - Region->MemMapInfo.MappedUser += MapSize; - SizeClassAllocator->getStats().add(StatMapped, MapSize); } - const u32 NumberOfBlocks = - Min(MaxNumBatches * MaxCount, - static_cast((Region->MemMapInfo.MappedUser - - Region->MemMapInfo.AllocatedUser) / - Size)); - DCHECK_GT(NumberOfBlocks, 0); + shuffle(ShuffleArray + NumberOfBlocks - N, N, &Region->RandState); + pushBlocksImpl(SizeClassAllocator, ClassId, Region, + &ShuffleArray[NumberOfBlocks - N], N, + /*SameGroup=*/true); + } else { + pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks); + } - constexpr u32 ShuffleArraySize = MaxNumBatches * MaxNumBlocksInBatch; - CompactPtrT ShuffleArray[ShuffleArraySize]; - DCHECK_LE(NumberOfBlocks, ShuffleArraySize); + const u16 PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, + ToArray, MaxBlockCount); + DCHECK_NE(PopCount, 0U); - const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); - uptr P = RegionBeg + Region->MemMapInfo.AllocatedUser; - for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) - ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P); + // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record + // the requests from `PushBlocks` and `PopBatch` which are external + // interfaces. `populateFreeListAndPopBlocks` is the internal interface so + // we should set the values back to avoid incorrectly setting the stats. + Region->FreeListInfo.PushedBlocks -= NumberOfBlocks; - ScopedLock L(Region->FLLock); + const uptr AllocatedUser = Size * NumberOfBlocks; + SizeClassAllocator->getStats().add(StatFree, AllocatedUser); + Region->MemMapInfo.AllocatedUser += AllocatedUser; - if (ClassId != SizeClassMap::BatchClassId) { - u32 N = 1; - uptr CurGroup = compactPtrGroup(ShuffleArray[0]); - for (u32 I = 1; I < NumberOfBlocks; I++) { - if (UNLIKELY(compactPtrGroup(ShuffleArray[I]) != CurGroup)) { - shuffle(ShuffleArray + I - N, N, &Region->RandState); - pushBlocksImpl(SizeClassAllocator, ClassId, Region, - ShuffleArray + I - N, N, - /*SameGroup=*/true); - N = 1; - CurGroup = compactPtrGroup(ShuffleArray[I]); - } else { - ++N; - } - } + return PopCount; +} - shuffle(ShuffleArray + NumberOfBlocks - N, N, &Region->RandState); - pushBlocksImpl(SizeClassAllocator, ClassId, Region, - &ShuffleArray[NumberOfBlocks - N], N, - /*SameGroup=*/true); - } else { - pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks); - } +template +void SizeClassAllocator64::pushBlocks( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, CompactPtrT *Array, + u32 Size) { + DCHECK_LT(ClassId, NumClasses); + DCHECK_GT(Size, 0); - const u16 PopCount = popBlocksImpl(SizeClassAllocator, ClassId, Region, - ToArray, MaxBlockCount); - DCHECK_NE(PopCount, 0U); - - // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record - // the requests from `PushBlocks` and `PopBatch` which are external - // interfaces. `populateFreeListAndPopBlocks` is the internal interface so - // we should set the values back to avoid incorrectly setting the stats. - Region->FreeListInfo.PushedBlocks -= NumberOfBlocks; + RegionInfo *Region = getRegionInfo(ClassId); + if (ClassId == SizeClassMap::BatchClassId) { + ScopedLock L(Region->FLLock); + pushBatchClassBlocks(Region, Array, Size); + if (conditionVariableEnabled()) + Region->FLLockCV.notifyAll(Region->FLLock); + return; + } - const uptr AllocatedUser = Size * NumberOfBlocks; - SizeClassAllocator->getStats().add(StatFree, AllocatedUser); - Region->MemMapInfo.AllocatedUser += AllocatedUser; + // TODO(chiahungduan): Consider not doing grouping if the group size is not + // greater than the block size with a certain scale. - return PopCount; + bool SameGroup = true; + if (GroupSizeLog < RegionSizeLog) { + // Sort the blocks so that blocks belonging to the same group can be + // pushed together. + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) + SameGroup = false; + CompactPtrT Cur = Array[I]; + u32 J = I; + while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) { + Array[J] = Array[J - 1]; + --J; + } + Array[J] = Cur; + } } - void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region) - REQUIRES(Region->MMLock, Region->FLLock) { - if (Region->MemMapInfo.MappedUser == 0) - return; - const uptr BlockSize = getSizeByClassId(ClassId); - const uptr InUseBlocks = - Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; - const uptr BytesInFreeList = - Region->MemMapInfo.AllocatedUser - InUseBlocks * BlockSize; - uptr RegionPushedBytesDelta = 0; - if (BytesInFreeList >= - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { - RegionPushedBytesDelta = - BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; - } - const uptr TotalChunks = Region->MemMapInfo.AllocatedUser / BlockSize; - Str->append("%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " - "inuse: %6zu total: %6zu releases attempted: %6zu last " - "released: %6zuK latest pushed bytes: %6zuK region: 0x%zx " - "(0x%zx)\n", - Region->Exhausted ? "E" : " ", ClassId, - getSizeByClassId(ClassId), Region->MemMapInfo.MappedUser >> 10, - Region->FreeListInfo.PoppedBlocks, - Region->FreeListInfo.PushedBlocks, InUseBlocks, TotalChunks, - Region->ReleaseInfo.NumReleasesAttempted, - Region->ReleaseInfo.LastReleasedBytes >> 10, - RegionPushedBytesDelta >> 10, Region->RegionBeg, - getRegionBaseByClassId(ClassId)); + { + ScopedLock L(Region->FLLock); + pushBlocksImpl(SizeClassAllocator, ClassId, Region, Array, Size, SameGroup); + if (conditionVariableEnabled()) + Region->FLLockCV.notifyAll(Region->FLLock); } +} - void getRegionFragmentationInfo(RegionInfo *Region, uptr ClassId, - ScopedString *Str) REQUIRES(Region->MMLock) { - const uptr BlockSize = getSizeByClassId(ClassId); - const uptr AllocatedUserEnd = - Region->MemMapInfo.AllocatedUser + Region->RegionBeg; +// Push the blocks to their batch group. The layout will be like, +// +// FreeListInfo.BlockList - > BG -> BG -> BG +// | | | +// v v v +// TB TB TB +// | +// v +// TB +// +// Each BlockGroup(BG) will associate with unique group id and the free blocks +// are managed by a list of Batch(TB). To reduce the time of inserting blocks, +// BGs are sorted and the input `Array` are supposed to be sorted so that we can +// get better performance of maintaining sorted property. Use `SameGroup=true` +// to indicate that all blocks in the array are from the same group then we will +// skip checking the group id of each block. +template +void SizeClassAllocator64::pushBlocksImpl( + SizeClassAllocatorT *SizeClassAllocator, uptr ClassId, RegionInfo *Region, + CompactPtrT *Array, u32 Size, bool SameGroup) REQUIRES(Region->FLLock) { + DCHECK_NE(ClassId, SizeClassMap::BatchClassId); + DCHECK_GT(Size, 0U); + + auto CreateGroup = [&](uptr CompactPtrGroupBase) { + BatchGroupT *BG = reinterpret_cast( + SizeClassAllocator->getBatchClassBlock()); + BG->Batches.clear(); + BatchT *TB = + reinterpret_cast(SizeClassAllocator->getBatchClassBlock()); + TB->clear(); + + BG->CompactPtrGroupBase = CompactPtrGroupBase; + BG->Batches.push_front(TB); + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = MaxNumBlocksInBatch; + + return BG; + }; - SinglyLinkedList GroupsToRelease; - { - ScopedLock L(Region->FLLock); - GroupsToRelease = Region->FreeListInfo.BlockList; - Region->FreeListInfo.BlockList.clear(); + auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) { + SinglyLinkedList &Batches = BG->Batches; + BatchT *CurBatch = Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount()); + u16 UnusedSlots = + static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = reinterpret_cast( + SizeClassAllocator->getBatchClassBlock()); + CurBatch->clear(); + Batches.push_front(CurBatch); + UnusedSlots = BG->MaxCachedPerBatch; + } + // `UnusedSlots` is u16 so the result will be also fit in u16. + u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; } + }; - FragmentationRecorder Recorder; - if (!GroupsToRelease.empty()) { - PageReleaseContext Context = - markFreeBlocks(Region, BlockSize, AllocatedUserEnd, - getCompactPtrBaseByClassId(ClassId), GroupsToRelease); - auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; - releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + Region->FreeListInfo.PushedBlocks += Size; + BatchGroupT *Cur = Region->FreeListInfo.BlockList.front(); - mergeGroupsToReleaseBack(Region, GroupsToRelease); - } + // In the following, `Cur` always points to the BatchGroup for blocks that + // will be pushed next. `Prev` is the element right before `Cur`. + BatchGroupT *Prev = nullptr; - ScopedLock L(Region->FLLock); - const uptr PageSize = getPageSizeCached(); - const uptr TotalBlocks = Region->MemMapInfo.AllocatedUser / BlockSize; - const uptr InUseBlocks = - Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; - const uptr AllocatedPagesCount = - roundUp(Region->MemMapInfo.AllocatedUser, PageSize) / PageSize; - DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount()); - const uptr InUsePages = - AllocatedPagesCount - Recorder.getReleasedPagesCount(); - const uptr InUseBytes = InUsePages * PageSize; + while (Cur != nullptr && + compactPtrGroup(Array[0]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } - uptr Integral; - uptr Fractional; - computePercentage(BlockSize * InUseBlocks, InUseBytes, &Integral, - &Fractional); - Str->append(" %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total " - "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n", - ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages, - AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional); + if (Cur == nullptr || compactPtrGroup(Array[0]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroup(Array[0])); + if (Prev == nullptr) + Region->FreeListInfo.BlockList.push_front(Cur); + else + Region->FreeListInfo.BlockList.insert(Prev, Cur); } - void getMemoryGroupFragmentationInfoInRegion(RegionInfo *Region, uptr ClassId, - ScopedString *Str) - REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { - const uptr BlockSize = getSizeByClassId(ClassId); - const uptr AllocatedUserEnd = - Region->MemMapInfo.AllocatedUser + Region->RegionBeg; + // All the blocks are from the same group, just push without checking group + // id. + if (SameGroup) { + for (u32 I = 0; I < Size; ++I) + DCHECK_EQ(compactPtrGroup(Array[I]), Cur->CompactPtrGroupBase); - SinglyLinkedList GroupsToRelease; - { - ScopedLock L(Region->FLLock); - GroupsToRelease = Region->FreeListInfo.BlockList; - Region->FreeListInfo.BlockList.clear(); - } + InsertBlocks(Cur, Array, Size); + return; + } - constexpr uptr GroupSize = (1UL << GroupSizeLog); - constexpr uptr MaxNumGroups = RegionSize / GroupSize; + // The blocks are sorted by group id. Determine the segment of group and + // push them to their group together. + u32 Count = 1; + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) { + DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->CompactPtrGroupBase); + InsertBlocks(Cur, Array + I - Count, Count); + + while (Cur != nullptr && + compactPtrGroup(Array[I]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } - MemoryGroupFragmentationRecorder Recorder; - if (!GroupsToRelease.empty()) { - PageReleaseContext Context = - markFreeBlocks(Region, BlockSize, AllocatedUserEnd, - getCompactPtrBaseByClassId(ClassId), GroupsToRelease); - auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; - releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + if (Cur == nullptr || + compactPtrGroup(Array[I]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroup(Array[I])); + DCHECK_NE(Prev, nullptr); + Region->FreeListInfo.BlockList.insert(Prev, Cur); + } - mergeGroupsToReleaseBack(Region, GroupsToRelease); + Count = 1; + } else { + ++Count; } + } - Str->append("MemoryGroupFragmentationInfo in Region %zu (%zu)\n", ClassId, - BlockSize); - - const uptr MaxNumGroupsInUse = - roundUp(Region->MemMapInfo.AllocatedUser, GroupSize) / GroupSize; - for (uptr I = 0; I < MaxNumGroupsInUse; ++I) { - uptr Integral; - uptr Fractional; - computePercentage(Recorder.NumPagesInOneGroup - - Recorder.getNumFreePages(I), - Recorder.NumPagesInOneGroup, &Integral, &Fractional); - Str->append("MemoryGroup #%zu (0x%zx): util: %3zu.%02zu%%\n", I, - Region->RegionBeg + I * GroupSize, Integral, Fractional); - } + InsertBlocks(Cur, Array + Size - Count, Count); +} + +template +void SizeClassAllocator64::pushBatchClassBlocks(RegionInfo *Region, + CompactPtrT *Array, + u32 Size) + REQUIRES(Region->FLLock) { + DCHECK_EQ(Region, getRegionInfo(SizeClassMap::BatchClassId)); + + // Free blocks are recorded by Batch in freelist for all + // size-classes. In addition, Batch is allocated from BatchClassId. + // In order not to use additional block to record the free blocks in + // BatchClassId, they are self-contained. I.e., A Batch records the + // block address of itself. See the figure below: + // + // Batch at 0xABCD + // +----------------------------+ + // | Free blocks' addr | + // | +------+------+------+ | + // | |0xABCD|... |... | | + // | +------+------+------+ | + // +----------------------------+ + // + // When we allocate all the free blocks in the Batch, the block used + // by Batch is also free for use. We don't need to recycle the + // Batch. Note that the correctness is maintained by the invariant, + // + // Each popBlocks() request returns the entire Batch. Returning + // part of the blocks in a Batch is invalid. + // + // This ensures that Batch won't leak the address itself while it's + // still holding other valid data. + // + // Besides, BatchGroup is also allocated from BatchClassId and has its + // address recorded in the Batch too. To maintain the correctness, + // + // The address of BatchGroup is always recorded in the last Batch + // in the freelist (also imply that the freelist should only be + // updated with push_front). Once the last Batch is popped, + // the block used by BatchGroup is also free for use. + // + // With this approach, the blocks used by BatchGroup and Batch are + // reusable and don't need additional space for them. + + Region->FreeListInfo.PushedBlocks += Size; + BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + + if (BG == nullptr) { + // Construct `BatchGroup` on the last element. + BG = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + --Size; + BG->Batches.clear(); + // BatchClass hasn't enabled memory group. Use `0` to indicate there's no + // memory group here. + BG->CompactPtrGroupBase = 0; + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = SizeClassAllocatorT::getMaxCached( + getSizeByClassId(SizeClassMap::BatchClassId)); + + Region->FreeListInfo.BlockList.push_front(BG); } - NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, - ReleaseToOS ReleaseType = ReleaseToOS::Normal) - REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { - const uptr BlockSize = getSizeByClassId(ClassId); - uptr BytesInFreeList; - const uptr AllocatedUserEnd = - Region->MemMapInfo.AllocatedUser + Region->RegionBeg; - uptr RegionPushedBytesDelta = 0; - SinglyLinkedList GroupsToRelease; + if (UNLIKELY(Size == 0)) + return; + + // This happens under 2 cases. + // 1. just allocated a new `BatchGroup`. + // 2. Only 1 block is pushed when the freelist is empty. + if (BG->Batches.empty()) { + // Construct the `Batch` on the last element. + BatchT *TB = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + TB->clear(); + // As mentioned above, addresses of `Batch` and `BatchGroup` are + // recorded in the Batch. + TB->add(Array[Size - 1]); + TB->add(compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(BG))); + --Size; + BG->Batches.push_front(TB); + } + BatchT *CurBatch = BG->Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + u16 UnusedSlots = + static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[I])); + CurBatch->clear(); + // Self-contained + CurBatch->add(Array[I]); + ++I; + // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of + // BatchClassId. + BG->Batches.push_front(CurBatch); + UnusedSlots = static_cast(BG->MaxCachedPerBatch - 1); + } + // `UnusedSlots` is u16 so the result will be also fit in u16. + const u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; + } +} + +template +void SizeClassAllocator64::disable() NO_THREAD_SAFETY_ANALYSIS { + // The BatchClassId must be locked last since other classes can use it. + for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { + if (static_cast(I) == SizeClassMap::BatchClassId) + continue; + getRegionInfo(static_cast(I))->MMLock.lock(); + getRegionInfo(static_cast(I))->FLLock.lock(); + } + getRegionInfo(SizeClassMap::BatchClassId)->MMLock.lock(); + getRegionInfo(SizeClassMap::BatchClassId)->FLLock.lock(); +} + +template +void SizeClassAllocator64::enable() NO_THREAD_SAFETY_ANALYSIS { + getRegionInfo(SizeClassMap::BatchClassId)->FLLock.unlock(); + getRegionInfo(SizeClassMap::BatchClassId)->MMLock.unlock(); + for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; + getRegionInfo(I)->FLLock.unlock(); + getRegionInfo(I)->MMLock.unlock(); + } +} + +template +template +void SizeClassAllocator64::iterateOverBlocks(F Callback) { + for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; + RegionInfo *Region = getRegionInfo(I); + // TODO: The call of `iterateOverBlocks` requires disabling + // SizeClassAllocator64. We may consider locking each region on demand + // only. + Region->FLLock.assertHeld(); + Region->MMLock.assertHeld(); + const uptr BlockSize = getSizeByClassId(I); + const uptr From = Region->RegionBeg; + const uptr To = From + Region->MemMapInfo.AllocatedUser; + for (uptr Block = From; Block < To; Block += BlockSize) + Callback(Block); + } +} + +template +void SizeClassAllocator64::getStats(ScopedString *Str) { + // TODO(kostyak): get the RSS per region. + uptr TotalMapped = 0; + uptr PoppedBlocks = 0; + uptr PushedBlocks = 0; + for (uptr I = 0; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + { + ScopedLock L(Region->MMLock); + TotalMapped += Region->MemMapInfo.MappedUser; + } { ScopedLock L(Region->FLLock); + PoppedBlocks += Region->FreeListInfo.PoppedBlocks; + PushedBlocks += Region->FreeListInfo.PushedBlocks; + } + } + const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); + Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu " + "allocations; remains %zu; ReleaseToOsIntervalMs = %d\n", + TotalMapped >> 20, 0U, PoppedBlocks, PoppedBlocks - PushedBlocks, + IntervalMs >= 0 ? IntervalMs : -1); + + for (uptr I = 0; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + ScopedLock L1(Region->MMLock); + ScopedLock L2(Region->FLLock); + getStats(Str, I, Region); + } +} + +template +void SizeClassAllocator64::getStats(ScopedString *Str, uptr ClassId, + RegionInfo *Region) + REQUIRES(Region->MMLock, Region->FLLock) { + if (Region->MemMapInfo.MappedUser == 0) + return; + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr InUseBlocks = + Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; + const uptr BytesInFreeList = + Region->MemMapInfo.AllocatedUser - InUseBlocks * BlockSize; + uptr RegionPushedBytesDelta = 0; + if (BytesInFreeList >= Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + RegionPushedBytesDelta = + BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + } + const uptr TotalChunks = Region->MemMapInfo.AllocatedUser / BlockSize; + Str->append( + "%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " + "inuse: %6zu total: %6zu releases attempted: %6zu last " + "released: %6zuK latest pushed bytes: %6zuK region: 0x%zx " + "(0x%zx)\n", + Region->Exhausted ? "E" : " ", ClassId, getSizeByClassId(ClassId), + Region->MemMapInfo.MappedUser >> 10, Region->FreeListInfo.PoppedBlocks, + Region->FreeListInfo.PushedBlocks, InUseBlocks, TotalChunks, + Region->ReleaseInfo.NumReleasesAttempted, + Region->ReleaseInfo.LastReleasedBytes >> 10, RegionPushedBytesDelta >> 10, + Region->RegionBeg, getRegionBaseByClassId(ClassId)); +} + +template +void SizeClassAllocator64::getFragmentationInfo(ScopedString *Str) { + Str->append( + "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n", + getPageSizeCached()); + + for (uptr I = 1; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + ScopedLock L(Region->MMLock); + getRegionFragmentationInfo(Region, I, Str); + } +} + +template +void SizeClassAllocator64::getRegionFragmentationInfo( + RegionInfo *Region, uptr ClassId, ScopedString *Str) + REQUIRES(Region->MMLock) { + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr AllocatedUserEnd = + Region->MemMapInfo.AllocatedUser + Region->RegionBeg; + + SinglyLinkedList GroupsToRelease; + { + ScopedLock L(Region->FLLock); + GroupsToRelease = Region->FreeListInfo.BlockList; + Region->FreeListInfo.BlockList.clear(); + } - BytesInFreeList = Region->MemMapInfo.AllocatedUser - - (Region->FreeListInfo.PoppedBlocks - - Region->FreeListInfo.PushedBlocks) * - BlockSize; - if (UNLIKELY(BytesInFreeList == 0)) - return false; - - // ==================================================================== // - // 1. Check if we have enough free blocks and if it's worth doing a page - // release. - // ==================================================================== // - if (ReleaseType != ReleaseToOS::ForceAll && - !hasChanceToReleasePages(Region, BlockSize, BytesInFreeList, - ReleaseType)) { - return 0; - } + FragmentationRecorder Recorder; + if (!GroupsToRelease.empty()) { + PageReleaseContext Context = + markFreeBlocks(Region, BlockSize, AllocatedUserEnd, + getCompactPtrBaseByClassId(ClassId), GroupsToRelease); + auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); - // Given that we will unlock the freelist for block operations, cache the - // value here so that when we are adapting the `TryReleaseThreshold` - // later, we are using the right metric. - RegionPushedBytesDelta = - BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; - - // ==================================================================== // - // 2. Determine which groups can release the pages. Use a heuristic to - // gather groups that are candidates for doing a release. - // ==================================================================== // - if (ReleaseType == ReleaseToOS::ForceAll) { - GroupsToRelease = Region->FreeListInfo.BlockList; - Region->FreeListInfo.BlockList.clear(); - } else { - GroupsToRelease = - collectGroupsToRelease(Region, BlockSize, AllocatedUserEnd, - getCompactPtrBaseByClassId(ClassId)); - } - if (GroupsToRelease.empty()) - return 0; - } + mergeGroupsToReleaseBack(Region, GroupsToRelease); + } - // The following steps contribute to the majority time spent in page - // releasing thus we increment the counter here. - ++Region->ReleaseInfo.NumReleasesAttempted; + ScopedLock L(Region->FLLock); + const uptr PageSize = getPageSizeCached(); + const uptr TotalBlocks = Region->MemMapInfo.AllocatedUser / BlockSize; + const uptr InUseBlocks = + Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; + const uptr AllocatedPagesCount = + roundUp(Region->MemMapInfo.AllocatedUser, PageSize) / PageSize; + DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount()); + const uptr InUsePages = + AllocatedPagesCount - Recorder.getReleasedPagesCount(); + const uptr InUseBytes = InUsePages * PageSize; + + uptr Integral; + uptr Fractional; + computePercentage(BlockSize * InUseBlocks, InUseBytes, &Integral, + &Fractional); + Str->append(" %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total " + "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n", + ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages, + AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional); +} + +template +void SizeClassAllocator64::getMemoryGroupFragmentationInfoInRegion( + RegionInfo *Region, uptr ClassId, ScopedString *Str) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr AllocatedUserEnd = + Region->MemMapInfo.AllocatedUser + Region->RegionBeg; + + SinglyLinkedList GroupsToRelease; + { + ScopedLock L(Region->FLLock); + GroupsToRelease = Region->FreeListInfo.BlockList; + Region->FreeListInfo.BlockList.clear(); + } - // Note that we have extracted the `GroupsToRelease` from region freelist. - // It's safe to let pushBlocks()/popBlocks() access the remaining region - // freelist. In the steps 3 and 4, we will temporarily release the FLLock - // and lock it again before step 5. + constexpr uptr GroupSize = (1UL << GroupSizeLog); + constexpr uptr MaxNumGroups = RegionSize / GroupSize; - // ==================================================================== // - // 3. Mark the free blocks in `GroupsToRelease` in the `PageReleaseContext`. - // Then we can tell which pages are in-use by querying - // `PageReleaseContext`. - // ==================================================================== // + MemoryGroupFragmentationRecorder Recorder; + if (!GroupsToRelease.empty()) { PageReleaseContext Context = markFreeBlocks(Region, BlockSize, AllocatedUserEnd, getCompactPtrBaseByClassId(ClassId), GroupsToRelease); - if (UNLIKELY(!Context.hasBlockMarked())) { - mergeGroupsToReleaseBack(Region, GroupsToRelease); - return 0; - } - - // ==================================================================== // - // 4. Release the unused physical pages back to the OS. - // ==================================================================== // - RegionReleaseRecorder Recorder(&Region->MemMapInfo.MemMap, - Region->RegionBeg, - Context.getReleaseOffset()); auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; releaseFreeMemoryToOS(Context, Recorder, SkipRegion); - if (Recorder.getReleasedBytes() > 0) { - // This is the case that we didn't hit the release threshold but it has - // been past a certain period of time. Thus we try to release some pages - // and if it does release some additional pages, it's hint that we are - // able to lower the threshold. Currently, this case happens when the - // `RegionPushedBytesDelta` is over half of the `TryReleaseThreshold`. As - // a result, we shrink the threshold to half accordingly. - // TODO(chiahungduan): Apply the same adjustment strategy to small blocks. - if (!isSmallBlock(BlockSize)) { - if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold && - Recorder.getReleasedBytes() > - Region->ReleaseInfo.LastReleasedBytes + - getMinReleaseAttemptSize(BlockSize)) { - Region->ReleaseInfo.TryReleaseThreshold = - Max(Region->ReleaseInfo.TryReleaseThreshold / 2, - getMinReleaseAttemptSize(BlockSize)); - } - } - - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; - Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); - } - Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); - - if (Region->ReleaseInfo.PendingPushedBytesDelta > 0) { - // Instead of increasing the threshold by the amount of - // `PendingPushedBytesDelta`, we only increase half of the amount so that - // it won't be a leap (which may lead to higher memory pressure) because - // of certain memory usage bursts which don't happen frequently. - Region->ReleaseInfo.TryReleaseThreshold += - Region->ReleaseInfo.PendingPushedBytesDelta / 2; - // This is another guard of avoiding the growth of threshold indefinitely. - // Note that we may consider to make this configurable if we have a better - // way to model this. - Region->ReleaseInfo.TryReleaseThreshold = Min( - Region->ReleaseInfo.TryReleaseThreshold, (1UL << GroupSizeLog) / 2); - Region->ReleaseInfo.PendingPushedBytesDelta = 0; - } - // ====================================================================== // - // 5. Merge the `GroupsToRelease` back to the freelist. - // ====================================================================== // mergeGroupsToReleaseBack(Region, GroupsToRelease); + } + + Str->append("MemoryGroupFragmentationInfo in Region %zu (%zu)\n", ClassId, + BlockSize); - return Recorder.getReleasedBytes(); + const uptr MaxNumGroupsInUse = + roundUp(Region->MemMapInfo.AllocatedUser, GroupSize) / GroupSize; + for (uptr I = 0; I < MaxNumGroupsInUse; ++I) { + uptr Integral; + uptr Fractional; + computePercentage(Recorder.NumPagesInOneGroup - Recorder.getNumFreePages(I), + Recorder.NumPagesInOneGroup, &Integral, &Fractional); + Str->append("MemoryGroup #%zu (0x%zx): util: %3zu.%02zu%%\n", I, + Region->RegionBeg + I * GroupSize, Integral, Fractional); + } +} + +template +void SizeClassAllocator64::getMemoryGroupFragmentationInfo( + ScopedString *Str) { + Str->append( + "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n", + getPageSizeCached()); + + for (uptr I = 1; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + ScopedLock L(Region->MMLock); + getMemoryGroupFragmentationInfoInRegion(Region, I, Str); + } +} + +template +bool SizeClassAllocator64::setOption(Option O, sptr Value) { + if (O == Option::ReleaseInterval) { + const s32 Interval = + Max(Min(static_cast(Value), Config::getMaxReleaseToOsIntervalMs()), + Config::getMinReleaseToOsIntervalMs()); + atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); + return true; + } + // Not supported by the Primary, but not an error either. + return true; +} + +template +uptr SizeClassAllocator64::tryReleaseToOS(uptr ClassId, + ReleaseToOS ReleaseType) { + RegionInfo *Region = getRegionInfo(ClassId); + // Note that the tryLock() may fail spuriously, given that it should rarely + // happen and page releasing is fine to skip, we don't take certain + // approaches to ensure one page release is done. + if (Region->MMLock.tryLock()) { + uptr BytesReleased = releaseToOSMaybe(Region, ClassId, ReleaseType); + Region->MMLock.unlock(); + return BytesReleased; + } + return 0; +} + +template +uptr SizeClassAllocator64::releaseToOS(ReleaseToOS ReleaseType) { + uptr TotalReleasedBytes = 0; + for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; + RegionInfo *Region = getRegionInfo(I); + ScopedLock L(Region->MMLock); + TotalReleasedBytes += releaseToOSMaybe(Region, I, ReleaseType); } + return TotalReleasedBytes; +} + +template +/* static */ BlockInfo SizeClassAllocator64::findNearestBlock( + const char *RegionInfoData, uptr Ptr) NO_THREAD_SAFETY_ANALYSIS { + const RegionInfo *RegionInfoArray = + reinterpret_cast(RegionInfoData); + + uptr ClassId; + uptr MinDistance = -1UL; + for (uptr I = 0; I != NumClasses; ++I) { + if (I == SizeClassMap::BatchClassId) + continue; + uptr Begin = RegionInfoArray[I].RegionBeg; + // TODO(chiahungduan): In fact, We need to lock the RegionInfo::MMLock. + // However, the RegionInfoData is passed with const qualifier and lock the + // mutex requires modifying RegionInfoData, which means we need to remove + // the const qualifier. This may lead to another undefined behavior (The + // first one is accessing `AllocatedUser` without locking. It's better to + // pass `RegionInfoData` as `void *` then we can lock the mutex properly. + uptr End = Begin + RegionInfoArray[I].MemMapInfo.AllocatedUser; + if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) + continue; + uptr RegionDistance; + if (Begin <= Ptr) { + if (Ptr < End) + RegionDistance = 0; + else + RegionDistance = Ptr - End; + } else { + RegionDistance = Begin - Ptr; + } - bool hasChanceToReleasePages(RegionInfo *Region, uptr BlockSize, - uptr BytesInFreeList, ReleaseToOS ReleaseType) - REQUIRES(Region->MMLock, Region->FLLock) { - DCHECK_GE(Region->FreeListInfo.PoppedBlocks, - Region->FreeListInfo.PushedBlocks); - // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value - // so that we won't underestimate the releasable pages. For example, the - // following is the region usage, - // - // BytesInFreeListAtLastCheckpoint AllocatedUser - // v v - // |---------------------------------------> - // ^ ^ - // BytesInFreeList ReleaseThreshold - // - // In general, if we have collected enough bytes and the amount of free - // bytes meets the ReleaseThreshold, we will try to do page release. If we - // don't update `BytesInFreeListAtLastCheckpoint` when the current - // `BytesInFreeList` is smaller, we may take longer time to wait for enough - // freed blocks because we miss the bytes between - // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). - if (BytesInFreeList <= - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + if (RegionDistance < MinDistance) { + MinDistance = RegionDistance; + ClassId = I; } + } - const uptr RegionPushedBytesDelta = - BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + BlockInfo B = {}; + if (MinDistance <= 8192) { + B.RegionBegin = RegionInfoArray[ClassId].RegionBeg; + B.RegionEnd = + B.RegionBegin + RegionInfoArray[ClassId].MemMapInfo.AllocatedUser; + B.BlockSize = SizeClassMap::getSizeByClassId(ClassId); + B.BlockBegin = B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / + sptr(B.BlockSize) * sptr(B.BlockSize)); + while (B.BlockBegin < B.RegionBegin) + B.BlockBegin += B.BlockSize; + while (B.RegionEnd < B.BlockBegin + B.BlockSize) + B.BlockBegin -= B.BlockSize; + } + return B; +} + +template +uptr SizeClassAllocator64::releaseToOSMaybe(RegionInfo *Region, + uptr ClassId, + ReleaseToOS ReleaseType) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + const uptr BlockSize = getSizeByClassId(ClassId); + uptr BytesInFreeList; + const uptr AllocatedUserEnd = + Region->MemMapInfo.AllocatedUser + Region->RegionBeg; + uptr RegionPushedBytesDelta = 0; + SinglyLinkedList GroupsToRelease; + + { + ScopedLock L(Region->FLLock); - if (ReleaseType == ReleaseToOS::Normal) { - if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold / 2) - return false; + BytesInFreeList = + Region->MemMapInfo.AllocatedUser - (Region->FreeListInfo.PoppedBlocks - + Region->FreeListInfo.PushedBlocks) * + BlockSize; + if (UNLIKELY(BytesInFreeList == 0)) + return false; - const s64 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); - if (IntervalMs < 0) - return false; + // ==================================================================== // + // 1. Check if we have enough free blocks and if it's worth doing a page + // release. + // ==================================================================== // + if (ReleaseType != ReleaseToOS::ForceAll && + !hasChanceToReleasePages(Region, BlockSize, BytesInFreeList, + ReleaseType)) { + return 0; + } - const u64 IntervalNs = static_cast(IntervalMs) * 1000000; - const u64 CurTimeNs = getMonotonicTimeFast(); - const u64 DiffSinceLastReleaseNs = - CurTimeNs - Region->ReleaseInfo.LastReleaseAtNs; - - // At here, `RegionPushedBytesDelta` is more than half of - // `TryReleaseThreshold`. If the last release happened 2 release interval - // before, we will still try to see if there's any chance to release some - // memory even it doesn't exceed the threshold. - if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold) { - // We want the threshold to have a shorter response time to the variant - // memory usage patterns. According to data collected during experiments - // (which were done with 1, 2, 4, 8 intervals), `2` strikes the better - // balance between the memory usage and number of page release attempts. - if (DiffSinceLastReleaseNs < 2 * IntervalNs) - return false; - } else if (DiffSinceLastReleaseNs < IntervalNs) { - // In this case, we are over the threshold but we just did some page - // release in the same release interval. This is a hint that we may want - // a higher threshold so that we can release more memory at once. - // `TryReleaseThreshold` will be adjusted according to how many bytes - // are not released, i.e., the `PendingPushedBytesdelta` here. - // TODO(chiahungduan): Apply the same adjustment strategy to small - // blocks. - if (!isSmallBlock(BlockSize)) - Region->ReleaseInfo.PendingPushedBytesDelta = RegionPushedBytesDelta; - - // Memory was returned recently. - return false; - } - } // if (ReleaseType == ReleaseToOS::Normal) + // Given that we will unlock the freelist for block operations, cache the + // value here so that when we are adapting the `TryReleaseThreshold` + // later, we are using the right metric. + RegionPushedBytesDelta = + BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; - return true; + // ==================================================================== // + // 2. Determine which groups can release the pages. Use a heuristic to + // gather groups that are candidates for doing a release. + // ==================================================================== // + if (ReleaseType == ReleaseToOS::ForceAll) { + GroupsToRelease = Region->FreeListInfo.BlockList; + Region->FreeListInfo.BlockList.clear(); + } else { + GroupsToRelease = + collectGroupsToRelease(Region, BlockSize, AllocatedUserEnd, + getCompactPtrBaseByClassId(ClassId)); + } + if (GroupsToRelease.empty()) + return 0; } - SinglyLinkedList - collectGroupsToRelease(RegionInfo *Region, const uptr BlockSize, - const uptr AllocatedUserEnd, const uptr CompactPtrBase) - REQUIRES(Region->MMLock, Region->FLLock) { - const uptr GroupSize = (1UL << GroupSizeLog); - const uptr PageSize = getPageSizeCached(); - SinglyLinkedList GroupsToRelease; - - // We are examining each group and will take the minimum distance to the - // release threshold as the next `TryReleaseThreshold`. Note that if the - // size of free blocks has reached the release threshold, the distance to - // the next release will be PageSize * SmallerBlockReleasePageDelta. See the - // comment on `SmallerBlockReleasePageDelta` for more details. - uptr MinDistToThreshold = GroupSize; - - for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(), - *Prev = nullptr; - BG != nullptr;) { - // Group boundary is always GroupSize-aligned from CompactPtr base. The - // layout of memory groups is like, - // - // (CompactPtrBase) - // #1 CompactPtrGroupBase #2 CompactPtrGroupBase ... - // | | | - // v v v - // +-----------------------+-----------------------+ - // \ / \ / - // --- GroupSize --- --- GroupSize --- - // - // After decompacting the CompactPtrGroupBase, we expect the alignment - // property is held as well. - const uptr BatchGroupBase = - decompactGroupBase(CompactPtrBase, BG->CompactPtrGroupBase); - DCHECK_LE(Region->RegionBeg, BatchGroupBase); - DCHECK_GE(AllocatedUserEnd, BatchGroupBase); - DCHECK_EQ((Region->RegionBeg - BatchGroupBase) % GroupSize, 0U); - // Batches are pushed in front of BG.Batches. The first one may - // not have all caches used. - const uptr NumBlocks = (BG->Batches.size() - 1) * BG->MaxCachedPerBatch + - BG->Batches.front()->getCount(); - const uptr BytesInBG = NumBlocks * BlockSize; - - if (BytesInBG <= BG->BytesInBGAtLastCheckpoint) { - BG->BytesInBGAtLastCheckpoint = BytesInBG; - Prev = BG; - BG = BG->Next; - continue; - } + // The following steps contribute to the majority time spent in page + // releasing thus we increment the counter here. + ++Region->ReleaseInfo.NumReleasesAttempted; + + // Note that we have extracted the `GroupsToRelease` from region freelist. + // It's safe to let pushBlocks()/popBlocks() access the remaining region + // freelist. In the steps 3 and 4, we will temporarily release the FLLock + // and lock it again before step 5. + + // ==================================================================== // + // 3. Mark the free blocks in `GroupsToRelease` in the `PageReleaseContext`. + // Then we can tell which pages are in-use by querying + // `PageReleaseContext`. + // ==================================================================== // + PageReleaseContext Context = + markFreeBlocks(Region, BlockSize, AllocatedUserEnd, + getCompactPtrBaseByClassId(ClassId), GroupsToRelease); + if (UNLIKELY(!Context.hasBlockMarked())) { + mergeGroupsToReleaseBack(Region, GroupsToRelease); + return 0; + } - const uptr PushedBytesDelta = BytesInBG - BG->BytesInBGAtLastCheckpoint; - if (PushedBytesDelta < getMinReleaseAttemptSize(BlockSize)) { - Prev = BG; - BG = BG->Next; - continue; + // ==================================================================== // + // 4. Release the unused physical pages back to the OS. + // ==================================================================== // + RegionReleaseRecorder Recorder(&Region->MemMapInfo.MemMap, + Region->RegionBeg, + Context.getReleaseOffset()); + auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + if (Recorder.getReleasedBytes() > 0) { + // This is the case that we didn't hit the release threshold but it has + // been past a certain period of time. Thus we try to release some pages + // and if it does release some additional pages, it's hint that we are + // able to lower the threshold. Currently, this case happens when the + // `RegionPushedBytesDelta` is over half of the `TryReleaseThreshold`. As + // a result, we shrink the threshold to half accordingly. + // TODO(chiahungduan): Apply the same adjustment strategy to small blocks. + if (!isSmallBlock(BlockSize)) { + if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold && + Recorder.getReleasedBytes() > + Region->ReleaseInfo.LastReleasedBytes + + getMinReleaseAttemptSize(BlockSize)) { + Region->ReleaseInfo.TryReleaseThreshold = + Max(Region->ReleaseInfo.TryReleaseThreshold / 2, + getMinReleaseAttemptSize(BlockSize)); } + } - // Given the randomness property, we try to release the pages only if the - // bytes used by free blocks exceed certain proportion of group size. Note - // that this heuristic only applies when all the spaces in a BatchGroup - // are allocated. - if (isSmallBlock(BlockSize)) { - const uptr BatchGroupEnd = BatchGroupBase + GroupSize; - const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd - ? GroupSize - : AllocatedUserEnd - BatchGroupBase; - const uptr ReleaseThreshold = - (AllocatedGroupSize * (100 - 1U - BlockSize / 16U)) / 100U; - const bool HighDensity = BytesInBG >= ReleaseThreshold; - const bool MayHaveReleasedAll = NumBlocks >= (GroupSize / BlockSize); - // If all blocks in the group are released, we will do range marking - // which is fast. Otherwise, we will wait until we have accumulated - // a certain amount of free memory. - const bool ReachReleaseDelta = - MayHaveReleasedAll - ? true - : PushedBytesDelta >= PageSize * SmallerBlockReleasePageDelta; - - if (!HighDensity) { - DCHECK_LE(BytesInBG, ReleaseThreshold); - // The following is the usage of a memroy group, - // - // BytesInBG ReleaseThreshold - // / \ v - // +---+---------------------------+-----+ - // | | | | | - // +---+---------------------------+-----+ - // \ / ^ - // PushedBytesDelta GroupEnd - MinDistToThreshold = - Min(MinDistToThreshold, - ReleaseThreshold - BytesInBG + PushedBytesDelta); - } else { - // If it reaches high density at this round, the next time we will try - // to release is based on SmallerBlockReleasePageDelta - MinDistToThreshold = - Min(MinDistToThreshold, PageSize * SmallerBlockReleasePageDelta); - } + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + } + Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); + + if (Region->ReleaseInfo.PendingPushedBytesDelta > 0) { + // Instead of increasing the threshold by the amount of + // `PendingPushedBytesDelta`, we only increase half of the amount so that + // it won't be a leap (which may lead to higher memory pressure) because + // of certain memory usage bursts which don't happen frequently. + Region->ReleaseInfo.TryReleaseThreshold += + Region->ReleaseInfo.PendingPushedBytesDelta / 2; + // This is another guard of avoiding the growth of threshold indefinitely. + // Note that we may consider to make this configurable if we have a better + // way to model this. + Region->ReleaseInfo.TryReleaseThreshold = Min( + Region->ReleaseInfo.TryReleaseThreshold, (1UL << GroupSizeLog) / 2); + Region->ReleaseInfo.PendingPushedBytesDelta = 0; + } - if (!HighDensity || !ReachReleaseDelta) { - Prev = BG; - BG = BG->Next; - continue; - } - } + // ====================================================================== // + // 5. Merge the `GroupsToRelease` back to the freelist. + // ====================================================================== // + mergeGroupsToReleaseBack(Region, GroupsToRelease); + + return Recorder.getReleasedBytes(); +} + +template +bool SizeClassAllocator64::hasChanceToReleasePages( + RegionInfo *Region, uptr BlockSize, uptr BytesInFreeList, + ReleaseToOS ReleaseType) REQUIRES(Region->MMLock, Region->FLLock) { + DCHECK_GE(Region->FreeListInfo.PoppedBlocks, + Region->FreeListInfo.PushedBlocks); + // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value + // so that we won't underestimate the releasable pages. For example, the + // following is the region usage, + // + // BytesInFreeListAtLastCheckpoint AllocatedUser + // v v + // |---------------------------------------> + // ^ ^ + // BytesInFreeList ReleaseThreshold + // + // In general, if we have collected enough bytes and the amount of free + // bytes meets the ReleaseThreshold, we will try to do page release. If we + // don't update `BytesInFreeListAtLastCheckpoint` when the current + // `BytesInFreeList` is smaller, we may take longer time to wait for enough + // freed blocks because we miss the bytes between + // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). + if (BytesInFreeList <= Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + } - // If `BG` is the first BatchGroupT in the list, we only need to advance - // `BG` and call FreeListInfo.BlockList::pop_front(). No update is needed - // for `Prev`. - // - // (BG) (BG->Next) - // Prev Cur BG - // | | | - // v v v - // nil +--+ +--+ - // |X | -> | | -> ... - // +--+ +--+ - // - // Otherwise, `Prev` will be used to extract the `Cur` from the - // `FreeListInfo.BlockList`. - // - // (BG) (BG->Next) - // Prev Cur BG - // | | | - // v v v - // +--+ +--+ +--+ - // | | -> |X | -> | | -> ... - // +--+ +--+ +--+ - // - // After FreeListInfo.BlockList::extract(), - // - // Prev Cur BG - // | | | - // v v v - // +--+ +--+ +--+ - // | |-+ |X | +->| | -> ... - // +--+ | +--+ | +--+ - // +--------+ - // - // Note that we need to advance before pushing this BatchGroup to - // GroupsToRelease because it's a destructive operation. - - BatchGroupT *Cur = BG; + const uptr RegionPushedBytesDelta = + BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + + if (ReleaseType == ReleaseToOS::Normal) { + if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold / 2) + return false; + + const s64 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); + if (IntervalMs < 0) + return false; + + const u64 IntervalNs = static_cast(IntervalMs) * 1000000; + const u64 CurTimeNs = getMonotonicTimeFast(); + const u64 DiffSinceLastReleaseNs = + CurTimeNs - Region->ReleaseInfo.LastReleaseAtNs; + + // At here, `RegionPushedBytesDelta` is more than half of + // `TryReleaseThreshold`. If the last release happened 2 release interval + // before, we will still try to see if there's any chance to release some + // memory even it doesn't exceed the threshold. + if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold) { + // We want the threshold to have a shorter response time to the variant + // memory usage patterns. According to data collected during experiments + // (which were done with 1, 2, 4, 8 intervals), `2` strikes the better + // balance between the memory usage and number of page release attempts. + if (DiffSinceLastReleaseNs < 2 * IntervalNs) + return false; + } else if (DiffSinceLastReleaseNs < IntervalNs) { + // In this case, we are over the threshold but we just did some page + // release in the same release interval. This is a hint that we may want + // a higher threshold so that we can release more memory at once. + // `TryReleaseThreshold` will be adjusted according to how many bytes + // are not released, i.e., the `PendingPushedBytesdelta` here. + // TODO(chiahungduan): Apply the same adjustment strategy to small + // blocks. + if (!isSmallBlock(BlockSize)) + Region->ReleaseInfo.PendingPushedBytesDelta = RegionPushedBytesDelta; + + // Memory was returned recently. + return false; + } + } // if (ReleaseType == ReleaseToOS::Normal) + + return true; +} + +template +SinglyLinkedList::BatchGroupT> +SizeClassAllocator64::collectGroupsToRelease( + RegionInfo *Region, const uptr BlockSize, const uptr AllocatedUserEnd, + const uptr CompactPtrBase) REQUIRES(Region->MMLock, Region->FLLock) { + const uptr GroupSize = (1UL << GroupSizeLog); + const uptr PageSize = getPageSizeCached(); + SinglyLinkedList GroupsToRelease; + + // We are examining each group and will take the minimum distance to the + // release threshold as the next `TryReleaseThreshold`. Note that if the + // size of free blocks has reached the release threshold, the distance to + // the next release will be PageSize * SmallerBlockReleasePageDelta. See the + // comment on `SmallerBlockReleasePageDelta` for more details. + uptr MinDistToThreshold = GroupSize; + + for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(), + *Prev = nullptr; + BG != nullptr;) { + // Group boundary is always GroupSize-aligned from CompactPtr base. The + // layout of memory groups is like, + // + // (CompactPtrBase) + // #1 CompactPtrGroupBase #2 CompactPtrGroupBase ... + // | | | + // v v v + // +-----------------------+-----------------------+ + // \ / \ / + // --- GroupSize --- --- GroupSize --- + // + // After decompacting the CompactPtrGroupBase, we expect the alignment + // property is held as well. + const uptr BatchGroupBase = + decompactGroupBase(CompactPtrBase, BG->CompactPtrGroupBase); + DCHECK_LE(Region->RegionBeg, BatchGroupBase); + DCHECK_GE(AllocatedUserEnd, BatchGroupBase); + DCHECK_EQ((Region->RegionBeg - BatchGroupBase) % GroupSize, 0U); + // Batches are pushed in front of BG.Batches. The first one may + // not have all caches used. + const uptr NumBlocks = (BG->Batches.size() - 1) * BG->MaxCachedPerBatch + + BG->Batches.front()->getCount(); + const uptr BytesInBG = NumBlocks * BlockSize; + + if (BytesInBG <= BG->BytesInBGAtLastCheckpoint) { + BG->BytesInBGAtLastCheckpoint = BytesInBG; + Prev = BG; BG = BG->Next; - - // Ideally, we may want to update this only after successful release. - // However, for smaller blocks, each block marking is a costly operation. - // Therefore, we update it earlier. - // TODO: Consider updating this after releasing pages if `ReleaseRecorder` - // can tell the released bytes in each group. - Cur->BytesInBGAtLastCheckpoint = BytesInBG; - - if (Prev != nullptr) - Region->FreeListInfo.BlockList.extract(Prev, Cur); - else - Region->FreeListInfo.BlockList.pop_front(); - GroupsToRelease.push_back(Cur); + continue; } - // Only small blocks have the adaptive `TryReleaseThreshold`. - if (isSmallBlock(BlockSize)) { - // If the MinDistToThreshold is not updated, that means each memory group - // may have only pushed less than a page size. In that case, just set it - // back to normal. - if (MinDistToThreshold == GroupSize) - MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta; - Region->ReleaseInfo.TryReleaseThreshold = MinDistToThreshold; + const uptr PushedBytesDelta = BytesInBG - BG->BytesInBGAtLastCheckpoint; + if (PushedBytesDelta < getMinReleaseAttemptSize(BlockSize)) { + Prev = BG; + BG = BG->Next; + continue; } - return GroupsToRelease; - } - - PageReleaseContext - markFreeBlocks(RegionInfo *Region, const uptr BlockSize, - const uptr AllocatedUserEnd, const uptr CompactPtrBase, - SinglyLinkedList &GroupsToRelease) - REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { - const uptr GroupSize = (1UL << GroupSizeLog); - auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) { - return decompactPtrInternal(CompactPtrBase, CompactPtr); - }; - - const uptr ReleaseBase = decompactGroupBase( - CompactPtrBase, GroupsToRelease.front()->CompactPtrGroupBase); - const uptr LastGroupEnd = - Min(decompactGroupBase(CompactPtrBase, - GroupsToRelease.back()->CompactPtrGroupBase) + - GroupSize, - AllocatedUserEnd); - // The last block may straddle the group boundary. Rounding up to BlockSize - // to get the exact range. - const uptr ReleaseEnd = - roundUpSlow(LastGroupEnd - Region->RegionBeg, BlockSize) + - Region->RegionBeg; - const uptr ReleaseRangeSize = ReleaseEnd - ReleaseBase; - const uptr ReleaseOffset = ReleaseBase - Region->RegionBeg; - - PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, - ReleaseRangeSize, ReleaseOffset); - // We may not be able to do the page release in a rare case that we may - // fail on PageMap allocation. - if (UNLIKELY(!Context.ensurePageMapAllocated())) - return Context; - - for (BatchGroupT &BG : GroupsToRelease) { - const uptr BatchGroupBase = - decompactGroupBase(CompactPtrBase, BG.CompactPtrGroupBase); + // Given the randomness property, we try to release the pages only if the + // bytes used by free blocks exceed certain proportion of group size. Note + // that this heuristic only applies when all the spaces in a BatchGroup + // are allocated. + if (isSmallBlock(BlockSize)) { const uptr BatchGroupEnd = BatchGroupBase + GroupSize; const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd ? GroupSize : AllocatedUserEnd - BatchGroupBase; - const uptr BatchGroupUsedEnd = BatchGroupBase + AllocatedGroupSize; - const bool MayContainLastBlockInRegion = - BatchGroupUsedEnd == AllocatedUserEnd; - const bool BlockAlignedWithUsedEnd = - (BatchGroupUsedEnd - Region->RegionBeg) % BlockSize == 0; - - uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize; - if (!BlockAlignedWithUsedEnd) - ++MaxContainedBlocks; - - const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch + - BG.Batches.front()->getCount(); - - if (NumBlocks == MaxContainedBlocks) { - for (const auto &It : BG.Batches) { - if (&It != BG.Batches.front()) - DCHECK_EQ(It.getCount(), BG.MaxCachedPerBatch); - for (u16 I = 0; I < It.getCount(); ++I) - DCHECK_EQ(compactPtrGroup(It.get(I)), BG.CompactPtrGroupBase); - } - - Context.markRangeAsAllCounted(BatchGroupBase, BatchGroupUsedEnd, - Region->RegionBeg, /*RegionIndex=*/0, - Region->MemMapInfo.AllocatedUser); + const uptr ReleaseThreshold = + (AllocatedGroupSize * (100 - 1U - BlockSize / 16U)) / 100U; + const bool HighDensity = BytesInBG >= ReleaseThreshold; + const bool MayHaveReleasedAll = NumBlocks >= (GroupSize / BlockSize); + // If all blocks in the group are released, we will do range marking + // which is fast. Otherwise, we will wait until we have accumulated + // a certain amount of free memory. + const bool ReachReleaseDelta = + MayHaveReleasedAll + ? true + : PushedBytesDelta >= PageSize * SmallerBlockReleasePageDelta; + + if (!HighDensity) { + DCHECK_LE(BytesInBG, ReleaseThreshold); + // The following is the usage of a memroy group, + // + // BytesInBG ReleaseThreshold + // / \ v + // +---+---------------------------+-----+ + // | | | | | + // +---+---------------------------+-----+ + // \ / ^ + // PushedBytesDelta GroupEnd + MinDistToThreshold = + Min(MinDistToThreshold, + ReleaseThreshold - BytesInBG + PushedBytesDelta); } else { - DCHECK_LT(NumBlocks, MaxContainedBlocks); - // Note that we don't always visit blocks in each BatchGroup so that we - // may miss the chance of releasing certain pages that cross - // BatchGroups. - Context.markFreeBlocksInRegion( - BG.Batches, DecompactPtr, Region->RegionBeg, /*RegionIndex=*/0, - Region->MemMapInfo.AllocatedUser, MayContainLastBlockInRegion); + // If it reaches high density at this round, the next time we will try + // to release is based on SmallerBlockReleasePageDelta + MinDistToThreshold = + Min(MinDistToThreshold, PageSize * SmallerBlockReleasePageDelta); + } + + if (!HighDensity || !ReachReleaseDelta) { + Prev = BG; + BG = BG->Next; + continue; } } - DCHECK(Context.hasBlockMarked()); + // If `BG` is the first BatchGroupT in the list, we only need to advance + // `BG` and call FreeListInfo.BlockList::pop_front(). No update is needed + // for `Prev`. + // + // (BG) (BG->Next) + // Prev Cur BG + // | | | + // v v v + // nil +--+ +--+ + // |X | -> | | -> ... + // +--+ +--+ + // + // Otherwise, `Prev` will be used to extract the `Cur` from the + // `FreeListInfo.BlockList`. + // + // (BG) (BG->Next) + // Prev Cur BG + // | | | + // v v v + // +--+ +--+ +--+ + // | | -> |X | -> | | -> ... + // +--+ +--+ +--+ + // + // After FreeListInfo.BlockList::extract(), + // + // Prev Cur BG + // | | | + // v v v + // +--+ +--+ +--+ + // | |-+ |X | +->| | -> ... + // +--+ | +--+ | +--+ + // +--------+ + // + // Note that we need to advance before pushing this BatchGroup to + // GroupsToRelease because it's a destructive operation. + + BatchGroupT *Cur = BG; + BG = BG->Next; + + // Ideally, we may want to update this only after successful release. + // However, for smaller blocks, each block marking is a costly operation. + // Therefore, we update it earlier. + // TODO: Consider updating this after releasing pages if `ReleaseRecorder` + // can tell the released bytes in each group. + Cur->BytesInBGAtLastCheckpoint = BytesInBG; + + if (Prev != nullptr) + Region->FreeListInfo.BlockList.extract(Prev, Cur); + else + Region->FreeListInfo.BlockList.pop_front(); + GroupsToRelease.push_back(Cur); + } - return Context; + // Only small blocks have the adaptive `TryReleaseThreshold`. + if (isSmallBlock(BlockSize)) { + // If the MinDistToThreshold is not updated, that means each memory group + // may have only pushed less than a page size. In that case, just set it + // back to normal. + if (MinDistToThreshold == GroupSize) + MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta; + Region->ReleaseInfo.TryReleaseThreshold = MinDistToThreshold; } - void mergeGroupsToReleaseBack(RegionInfo *Region, - SinglyLinkedList &GroupsToRelease) - REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { - ScopedLock L(Region->FLLock); + return GroupsToRelease; +} + +template +PageReleaseContext SizeClassAllocator64::markFreeBlocks( + RegionInfo *Region, const uptr BlockSize, const uptr AllocatedUserEnd, + const uptr CompactPtrBase, SinglyLinkedList &GroupsToRelease) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + const uptr GroupSize = (1UL << GroupSizeLog); + auto DecompactPtr = [CompactPtrBase, this](CompactPtrT CompactPtr) { + return decompactPtrInternal(CompactPtrBase, CompactPtr); + }; - // After merging two freelists, we may have redundant `BatchGroup`s that - // need to be recycled. The number of unused `BatchGroup`s is expected to be - // small. Pick a constant which is inferred from real programs. - constexpr uptr MaxUnusedSize = 8; - CompactPtrT Blocks[MaxUnusedSize]; - u32 Idx = 0; - RegionInfo *BatchClassRegion = getRegionInfo(SizeClassMap::BatchClassId); - // We can't call pushBatchClassBlocks() to recycle the unused `BatchGroup`s - // when we are manipulating the freelist of `BatchClassRegion`. Instead, we - // should just push it back to the freelist when we merge two `BatchGroup`s. - // This logic hasn't been implemented because we haven't supported releasing - // pages in `BatchClassRegion`. - DCHECK_NE(BatchClassRegion, Region); - - // Merge GroupsToRelease back to the Region::FreeListInfo.BlockList. Note - // that both `Region->FreeListInfo.BlockList` and `GroupsToRelease` are - // sorted. - for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(), - *Prev = nullptr; - ;) { - if (BG == nullptr || GroupsToRelease.empty()) { - if (!GroupsToRelease.empty()) - Region->FreeListInfo.BlockList.append_back(&GroupsToRelease); - break; + const uptr ReleaseBase = decompactGroupBase( + CompactPtrBase, GroupsToRelease.front()->CompactPtrGroupBase); + const uptr LastGroupEnd = + Min(decompactGroupBase(CompactPtrBase, + GroupsToRelease.back()->CompactPtrGroupBase) + + GroupSize, + AllocatedUserEnd); + // The last block may straddle the group boundary. Rounding up to BlockSize + // to get the exact range. + const uptr ReleaseEnd = + roundUpSlow(LastGroupEnd - Region->RegionBeg, BlockSize) + + Region->RegionBeg; + const uptr ReleaseRangeSize = ReleaseEnd - ReleaseBase; + const uptr ReleaseOffset = ReleaseBase - Region->RegionBeg; + + PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + ReleaseRangeSize, ReleaseOffset); + // We may not be able to do the page release in a rare case that we may + // fail on PageMap allocation. + if (UNLIKELY(!Context.ensurePageMapAllocated())) + return Context; + + for (BatchGroupT &BG : GroupsToRelease) { + const uptr BatchGroupBase = + decompactGroupBase(CompactPtrBase, BG.CompactPtrGroupBase); + const uptr BatchGroupEnd = BatchGroupBase + GroupSize; + const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd + ? GroupSize + : AllocatedUserEnd - BatchGroupBase; + const uptr BatchGroupUsedEnd = BatchGroupBase + AllocatedGroupSize; + const bool MayContainLastBlockInRegion = + BatchGroupUsedEnd == AllocatedUserEnd; + const bool BlockAlignedWithUsedEnd = + (BatchGroupUsedEnd - Region->RegionBeg) % BlockSize == 0; + + uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize; + if (!BlockAlignedWithUsedEnd) + ++MaxContainedBlocks; + + const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch + + BG.Batches.front()->getCount(); + + if (NumBlocks == MaxContainedBlocks) { + for (const auto &It : BG.Batches) { + if (&It != BG.Batches.front()) + DCHECK_EQ(It.getCount(), BG.MaxCachedPerBatch); + for (u16 I = 0; I < It.getCount(); ++I) + DCHECK_EQ(compactPtrGroup(It.get(I)), BG.CompactPtrGroupBase); } - DCHECK(!BG->Batches.empty()); + Context.markRangeAsAllCounted(BatchGroupBase, BatchGroupUsedEnd, + Region->RegionBeg, /*RegionIndex=*/0, + Region->MemMapInfo.AllocatedUser); + } else { + DCHECK_LT(NumBlocks, MaxContainedBlocks); + // Note that we don't always visit blocks in each BatchGroup so that we + // may miss the chance of releasing certain pages that cross + // BatchGroups. + Context.markFreeBlocksInRegion( + BG.Batches, DecompactPtr, Region->RegionBeg, /*RegionIndex=*/0, + Region->MemMapInfo.AllocatedUser, MayContainLastBlockInRegion); + } + } + + DCHECK(Context.hasBlockMarked()); + + return Context; +} + +template +void SizeClassAllocator64::mergeGroupsToReleaseBack( + RegionInfo *Region, SinglyLinkedList &GroupsToRelease) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + ScopedLock L(Region->FLLock); + + // After merging two freelists, we may have redundant `BatchGroup`s that + // need to be recycled. The number of unused `BatchGroup`s is expected to be + // small. Pick a constant which is inferred from real programs. + constexpr uptr MaxUnusedSize = 8; + CompactPtrT Blocks[MaxUnusedSize]; + u32 Idx = 0; + RegionInfo *BatchClassRegion = getRegionInfo(SizeClassMap::BatchClassId); + // We can't call pushBatchClassBlocks() to recycle the unused `BatchGroup`s + // when we are manipulating the freelist of `BatchClassRegion`. Instead, we + // should just push it back to the freelist when we merge two `BatchGroup`s. + // This logic hasn't been implemented because we haven't supported releasing + // pages in `BatchClassRegion`. + DCHECK_NE(BatchClassRegion, Region); + + // Merge GroupsToRelease back to the Region::FreeListInfo.BlockList. Note + // that both `Region->FreeListInfo.BlockList` and `GroupsToRelease` are + // sorted. + for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(), + *Prev = nullptr; + ;) { + if (BG == nullptr || GroupsToRelease.empty()) { + if (!GroupsToRelease.empty()) + Region->FreeListInfo.BlockList.append_back(&GroupsToRelease); + break; + } - if (BG->CompactPtrGroupBase < - GroupsToRelease.front()->CompactPtrGroupBase) { - Prev = BG; - BG = BG->Next; - continue; - } + DCHECK(!BG->Batches.empty()); - BatchGroupT *Cur = GroupsToRelease.front(); - BatchT *UnusedBatch = nullptr; - GroupsToRelease.pop_front(); - - if (BG->CompactPtrGroupBase == Cur->CompactPtrGroupBase) { - // We have updated `BatchGroup::BytesInBGAtLastCheckpoint` while - // collecting the `GroupsToRelease`. - BG->BytesInBGAtLastCheckpoint = Cur->BytesInBGAtLastCheckpoint; - const uptr MaxCachedPerBatch = BG->MaxCachedPerBatch; - - // Note that the first Batches in both `Batches` may not be - // full and only the first Batch can have non-full blocks. Thus - // we have to merge them before appending one to another. - if (Cur->Batches.front()->getCount() == MaxCachedPerBatch) { - BG->Batches.append_back(&Cur->Batches); + if (BG->CompactPtrGroupBase < + GroupsToRelease.front()->CompactPtrGroupBase) { + Prev = BG; + BG = BG->Next; + continue; + } + + BatchGroupT *Cur = GroupsToRelease.front(); + BatchT *UnusedBatch = nullptr; + GroupsToRelease.pop_front(); + + if (BG->CompactPtrGroupBase == Cur->CompactPtrGroupBase) { + // We have updated `BatchGroup::BytesInBGAtLastCheckpoint` while + // collecting the `GroupsToRelease`. + BG->BytesInBGAtLastCheckpoint = Cur->BytesInBGAtLastCheckpoint; + const uptr MaxCachedPerBatch = BG->MaxCachedPerBatch; + + // Note that the first Batches in both `Batches` may not be + // full and only the first Batch can have non-full blocks. Thus + // we have to merge them before appending one to another. + if (Cur->Batches.front()->getCount() == MaxCachedPerBatch) { + BG->Batches.append_back(&Cur->Batches); + } else { + BatchT *NonFullBatch = Cur->Batches.front(); + Cur->Batches.pop_front(); + const u16 NonFullBatchCount = NonFullBatch->getCount(); + // The remaining Batches in `Cur` are full. + BG->Batches.append_back(&Cur->Batches); + + if (BG->Batches.front()->getCount() == MaxCachedPerBatch) { + // Only 1 non-full Batch, push it to the front. + BG->Batches.push_front(NonFullBatch); } else { - BatchT *NonFullBatch = Cur->Batches.front(); - Cur->Batches.pop_front(); - const u16 NonFullBatchCount = NonFullBatch->getCount(); - // The remaining Batches in `Cur` are full. - BG->Batches.append_back(&Cur->Batches); - - if (BG->Batches.front()->getCount() == MaxCachedPerBatch) { - // Only 1 non-full Batch, push it to the front. + const u16 NumBlocksToMove = static_cast( + Min(static_cast(MaxCachedPerBatch - + BG->Batches.front()->getCount()), + NonFullBatchCount)); + BG->Batches.front()->appendFromBatch(NonFullBatch, NumBlocksToMove); + if (NonFullBatch->isEmpty()) + UnusedBatch = NonFullBatch; + else BG->Batches.push_front(NonFullBatch); - } else { - const u16 NumBlocksToMove = static_cast( - Min(static_cast(MaxCachedPerBatch - - BG->Batches.front()->getCount()), - NonFullBatchCount)); - BG->Batches.front()->appendFromBatch(NonFullBatch, NumBlocksToMove); - if (NonFullBatch->isEmpty()) - UnusedBatch = NonFullBatch; - else - BG->Batches.push_front(NonFullBatch); - } } - - const u32 NeededSlots = UnusedBatch == nullptr ? 1U : 2U; - if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) { - ScopedLock L(BatchClassRegion->FLLock); - pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); - if (conditionVariableEnabled()) - BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock); - Idx = 0; - } - Blocks[Idx++] = - compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(Cur)); - if (UnusedBatch) { - Blocks[Idx++] = compactPtr(SizeClassMap::BatchClassId, - reinterpret_cast(UnusedBatch)); - } - Prev = BG; - BG = BG->Next; - continue; } - // At here, the `BG` is the first BatchGroup with CompactPtrGroupBase - // larger than the first element in `GroupsToRelease`. We need to insert - // `GroupsToRelease::front()` (which is `Cur` below) before `BG`. - // - // 1. If `Prev` is nullptr, we simply push `Cur` to the front of - // FreeListInfo.BlockList. - // 2. Otherwise, use `insert()` which inserts an element next to `Prev`. - // - // Afterwards, we don't need to advance `BG` because the order between - // `BG` and the new `GroupsToRelease::front()` hasn't been checked. - if (Prev == nullptr) - Region->FreeListInfo.BlockList.push_front(Cur); - else - Region->FreeListInfo.BlockList.insert(Prev, Cur); - DCHECK_EQ(Cur->Next, BG); - Prev = Cur; - } - - if (Idx != 0) { - ScopedLock L(BatchClassRegion->FLLock); - pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); - if (conditionVariableEnabled()) - BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock); - } - - if (SCUDO_DEBUG) { - BatchGroupT *Prev = Region->FreeListInfo.BlockList.front(); - for (BatchGroupT *Cur = Prev->Next; Cur != nullptr; - Prev = Cur, Cur = Cur->Next) { - CHECK_LT(Prev->CompactPtrGroupBase, Cur->CompactPtrGroupBase); + const u32 NeededSlots = UnusedBatch == nullptr ? 1U : 2U; + if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) { + ScopedLock L(BatchClassRegion->FLLock); + pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); + if (conditionVariableEnabled()) + BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock); + Idx = 0; } + Blocks[Idx++] = + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(Cur)); + if (UnusedBatch) { + Blocks[Idx++] = compactPtr(SizeClassMap::BatchClassId, + reinterpret_cast(UnusedBatch)); + } + Prev = BG; + BG = BG->Next; + continue; } + // At here, the `BG` is the first BatchGroup with CompactPtrGroupBase + // larger than the first element in `GroupsToRelease`. We need to insert + // `GroupsToRelease::front()` (which is `Cur` below) before `BG`. + // + // 1. If `Prev` is nullptr, we simply push `Cur` to the front of + // FreeListInfo.BlockList. + // 2. Otherwise, use `insert()` which inserts an element next to `Prev`. + // + // Afterwards, we don't need to advance `BG` because the order between + // `BG` and the new `GroupsToRelease::front()` hasn't been checked. + if (Prev == nullptr) + Region->FreeListInfo.BlockList.push_front(Cur); + else + Region->FreeListInfo.BlockList.insert(Prev, Cur); + DCHECK_EQ(Cur->Next, BG); + Prev = Cur; + } + + if (Idx != 0) { + ScopedLock L(BatchClassRegion->FLLock); + pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); if (conditionVariableEnabled()) - Region->FLLockCV.notifyAll(Region->FLLock); + BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock); } - // The minimum size of pushed blocks that we will try to release the pages in - // that size class. - uptr SmallerBlockReleasePageDelta = 0; - atomic_s32 ReleaseToOsIntervalMs = {}; - alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; -}; + if (SCUDO_DEBUG) { + BatchGroupT *Prev = Region->FreeListInfo.BlockList.front(); + for (BatchGroupT *Cur = Prev->Next; Cur != nullptr; + Prev = Cur, Cur = Cur->Next) { + CHECK_LT(Prev->CompactPtrGroupBase, Cur->CompactPtrGroupBase); + } + } + if (conditionVariableEnabled()) + Region->FLLockCV.notifyAll(Region->FLLock); +} } // namespace scudo #endif // SCUDO_PRIMARY64_H_