diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h --- a/compiler-rt/lib/scudo/standalone/allocator_config.h +++ b/compiler-rt/lib/scudo/standalone/allocator_config.h @@ -32,7 +32,7 @@ // 512KB regions typedef SizeClassAllocator32 Primary; #endif - typedef MapAllocator<> Secondary; + typedef MapAllocator> Secondary; template using TSDRegistryT = TSDRegistryExT; // Exclusive }; @@ -47,7 +47,7 @@ // 512KB regions typedef SizeClassAllocator32 Primary; #endif - typedef MapAllocator<> Secondary; + typedef MapAllocator> Secondary; template using TSDRegistryT = TSDRegistrySharedT; // Shared, max 2 TSDs. }; @@ -61,7 +61,7 @@ // 64KB regions typedef SizeClassAllocator32 Primary; #endif - typedef MapAllocator<0U> Secondary; + typedef MapAllocator> Secondary; template using TSDRegistryT = TSDRegistrySharedT; // Shared, only 1 TSD. }; @@ -70,7 +70,7 @@ struct FuchsiaConfig { // 1GB Regions typedef SizeClassAllocator64 Primary; - typedef MapAllocator<0U> Secondary; + typedef MapAllocator Secondary; template using TSDRegistryT = TSDRegistrySharedT; // Shared, max 8 TSDs. }; diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -141,8 +141,9 @@ static_cast(getFlags()->quarantine_max_chunk_size); Stats.initLinkerInitialized(); - Primary.initLinkerInitialized(getFlags()->release_to_os_interval_ms); - Secondary.initLinkerInitialized(&Stats); + const s32 ReleaseToOsIntervalMs = getFlags()->release_to_os_interval_ms; + Primary.initLinkerInitialized(ReleaseToOsIntervalMs); + Secondary.initLinkerInitialized(&Stats, ReleaseToOsIntervalMs); Quarantine.init( static_cast(getFlags()->quarantine_size_kb << 10), diff --git a/compiler-rt/lib/scudo/standalone/flags.inc b/compiler-rt/lib/scudo/standalone/flags.inc --- a/compiler-rt/lib/scudo/standalone/flags.inc +++ b/compiler-rt/lib/scudo/standalone/flags.inc @@ -45,6 +45,6 @@ "returning NULL in otherwise non-fatal error scenarios, eg: OOM, " "invalid allocation alignments, etc.") -SCUDO_FLAG(int, release_to_os_interval_ms, 5000, +SCUDO_FLAG(int, release_to_os_interval_ms, SCUDO_ANDROID ? 1000 : 5000, "Interval (in milliseconds) at which to attempt release of unused " "memory to the OS. Negative values disable the feature.") diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -74,8 +74,7 @@ Sci->RandState = getRandomU32(&Seed); // See comment in the 64-bit primary about releasing smaller size classes. Sci->CanRelease = (ReleaseToOsInterval >= 0) && - (I != SizeClassMap::BatchClassId) && - (getSizeByClassId(I) >= (PageSize / 32)); + (getSizeByClassId(I) >= (PageSize / 64)); } ReleaseToOsIntervalMs = ReleaseToOsInterval; } @@ -385,7 +384,7 @@ if (IntervalMs < 0) return 0; if (Sci->ReleaseInfo.LastReleaseAtNs + - static_cast(IntervalMs) * 1000000ULL > + static_cast(IntervalMs) * 1000000 > getMonotonicTime()) { return 0; // Memory was returned recently. } diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -87,8 +87,7 @@ // limit is mostly arbitrary and based on empirical observations. // TODO(kostyak): make the lower limit a runtime option Region->CanRelease = (ReleaseToOsInterval >= 0) && - (I != SizeClassMap::BatchClassId) && - (getSizeByClassId(I) >= (PageSize / 32)); + (getSizeByClassId(I) >= (PageSize / 64)); Region->RandState = getRandomU32(&Seed); } ReleaseToOsIntervalMs = ReleaseToOsInterval; @@ -401,7 +400,7 @@ if (IntervalMs < 0) return 0; if (Region->ReleaseInfo.LastReleaseAtNs + - static_cast(IntervalMs) * 1000000ULL > + static_cast(IntervalMs) * 1000000 > getMonotonicTime()) { return 0; // Memory was returned recently. } diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -200,7 +200,13 @@ if (BlockSize <= PageSize && PageSize % BlockSize == 0) { // Each chunk affects one page only. for (const auto &It : FreeList) { - for (u32 I = 0; I < It.getCount(); I++) { + // If dealing with a TransferBatch, the first pointer of the batch will + // point to the batch itself, we do not want to mark this for release as + // the batch is in use, so skip the first entry. + const bool IsTransferBatch = + (It.getCount() != 0) && + (reinterpret_cast(It.get(0)) == reinterpret_cast(&It)); + for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { const uptr P = reinterpret_cast(It.get(I)); if (P >= Base && P < End) Counters.inc((P - Base) >> PageSizeLog); @@ -209,7 +215,11 @@ } else { // In all other cases chunks might affect more than one page. for (const auto &It : FreeList) { - for (u32 I = 0; I < It.getCount(); I++) { + // See TransferBatch comment above. + const bool IsTransferBatch = + (It.getCount() != 0) && + (reinterpret_cast(It.get(0)) == reinterpret_cast(&It)); + for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { const uptr P = reinterpret_cast(It.get(I)); if (P >= Base && P < End) Counters.incRange((P - Base) >> PageSizeLog, diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -48,20 +48,183 @@ } // namespace LargeBlock -template class MapAllocator { +class MapAllocatorNoCache { public: - // Ensure the freelist is disabled on Fuchsia, since it doesn't support - // releasing Secondary blocks yet. - static_assert(!SCUDO_FUCHSIA || MaxFreeListSize == 0U, ""); + void initLinkerInitialized(UNUSED s32 ReleaseToOsInterval) {} + void init(UNUSED s32 ReleaseToOsInterval) {} + bool retrieve(UNUSED uptr Size, UNUSED LargeBlock::Header **H) { + return false; + } + bool store(UNUSED LargeBlock::Header *H) { return false; } + static bool canCache(UNUSED uptr Size) { return false; } + void disable() {} + void enable() {} +}; + +template +class MapAllocatorCache { +public: + // Fuchsia doesn't allow releasing Secondary blocks yet. Note that 0 length + // arrays are an extension for some compilers. + // FIXME(kostyak): support (partially) the cache on Fuchsia. + static_assert(!SCUDO_FUCHSIA || MaxEntriesCount == 0U, ""); + + void initLinkerInitialized(s32 ReleaseToOsInterval) { + ReleaseToOsIntervalMs = ReleaseToOsInterval; + } + void init(s32 ReleaseToOsInterval) { + memset(this, 0, sizeof(*this)); + initLinkerInitialized(ReleaseToOsInterval); + } + + bool store(LargeBlock::Header *H) { + bool EntryCached = false; + bool EmptyCache = false; + const u64 Time = getMonotonicTime(); + { + ScopedLock L(Mutex); + if (EntriesCount == MaxEntriesCount) { + if (IsFullEvents++ == 4U) + EmptyCache = true; + } else { + for (uptr I = 0; I < MaxEntriesCount; I++) { + if (Entries[I].Block) + continue; + if (I != 0) + Entries[I] = Entries[0]; + Entries[0].Block = reinterpret_cast(H); + Entries[0].BlockEnd = H->BlockEnd; + Entries[0].MapBase = H->MapBase; + Entries[0].MapSize = H->MapSize; + Entries[0].Time = Time; + EntriesCount++; + EntryCached = true; + break; + } + } + } + if (EmptyCache) + empty(); + else if (ReleaseToOsIntervalMs >= 0) + releaseOlderThan(Time - + static_cast(ReleaseToOsIntervalMs) * 1000000); + return EntryCached; + } + + bool retrieve(uptr Size, LargeBlock::Header **H) { + ScopedLock L(Mutex); + if (EntriesCount == 0) + return false; + for (uptr I = 0; I < MaxEntriesCount; I++) { + if (!Entries[I].Block) + continue; + const uptr BlockSize = Entries[I].BlockEnd - Entries[I].Block; + if (Size > BlockSize) + continue; + if (Size < BlockSize - getPageSizeCached() * 4U) + continue; + *H = reinterpret_cast(Entries[I].Block); + Entries[I].Block = 0; + (*H)->BlockEnd = Entries[I].BlockEnd; + (*H)->MapBase = Entries[I].MapBase; + (*H)->MapSize = Entries[I].MapSize; + EntriesCount--; + return true; + } + return false; + } + + static bool canCache(uptr Size) { + return MaxEntriesCount != 0U && Size <= MaxEntrySize; + } + + void disable() { Mutex.lock(); } + + void enable() { Mutex.unlock(); } + +private: + void empty() { + struct { + void *MapBase; + uptr MapSize; + MapPlatformData Data; + } MapInfo[MaxEntriesCount]; + uptr N = 0; + { + ScopedLock L(Mutex); + for (uptr I = 0; I < MaxEntriesCount; I++) { + if (!Entries[I].Block) + continue; + MapInfo[N].MapBase = reinterpret_cast(Entries[I].MapBase); + MapInfo[N].MapSize = Entries[I].MapSize; + MapInfo[N].Data = Entries[I].Data; + Entries[I].Block = 0; + N++; + } + EntriesCount = 0; + IsFullEvents = 0; + } + for (uptr I = 0; I < N; I++) + unmap(MapInfo[I].MapBase, MapInfo[I].MapSize, UNMAP_ALL, + &MapInfo[I].Data); + } + + void releaseOlderThan(u64 Time) { + struct { + uptr Block; + uptr BlockSize; + MapPlatformData Data; + } BlockInfo[MaxEntriesCount]; + uptr N = 0; + { + ScopedLock L(Mutex); + if (!EntriesCount) + return; + for (uptr I = 0; I < MaxEntriesCount; I++) { + if (!Entries[I].Block || !Entries[I].Time) + continue; + if (Entries[I].Time > Time) + continue; + BlockInfo[N].Block = Entries[I].Block; + BlockInfo[N].BlockSize = Entries[I].BlockEnd - Entries[I].Block; + BlockInfo[N].Data = Entries[I].Data; + Entries[I].Time = 0; + N++; + } + } + for (uptr I = 0; I < N; I++) + releasePagesToOS(BlockInfo[I].Block, 0, BlockInfo[I].BlockSize, + &BlockInfo[I].Data); + } + + struct CachedBlock { + uptr Block; + uptr BlockEnd; + uptr MapBase; + uptr MapSize; + MapPlatformData Data; + u64 Time; + }; - void initLinkerInitialized(GlobalStats *S) { + HybridMutex Mutex; + CachedBlock Entries[MaxEntriesCount]; + u32 EntriesCount; + uptr LargestSize; + u32 IsFullEvents; + s32 ReleaseToOsIntervalMs; +}; + +template class MapAllocator { +public: + void initLinkerInitialized(GlobalStats *S, s32 ReleaseToOsInterval = -1) { + Cache.initLinkerInitialized(ReleaseToOsInterval); Stats.initLinkerInitialized(); if (LIKELY(S)) S->link(&Stats); } - void init(GlobalStats *S) { + void init(GlobalStats *S, s32 ReleaseToOsInterval = -1) { memset(this, 0, sizeof(*this)); - initLinkerInitialized(S); + initLinkerInitialized(S, ReleaseToOsInterval); } void *allocate(uptr Size, uptr AlignmentHint = 0, uptr *BlockEnd = nullptr, @@ -79,22 +242,28 @@ void getStats(ScopedString *Str) const; - void disable() { Mutex.lock(); } + void disable() { + Mutex.lock(); + Cache.disable(); + } - void enable() { Mutex.unlock(); } + void enable() { + Cache.enable(); + Mutex.unlock(); + } template void iterateOverBlocks(F Callback) const { for (const auto &H : InUseBlocks) Callback(reinterpret_cast(&H) + LargeBlock::getHeaderSize()); } - static uptr getMaxFreeListSize(void) { return MaxFreeListSize; } + static uptr canCache(uptr Size) { return CacheT::canCache(Size); } private: + CacheT Cache; + HybridMutex Mutex; DoublyLinkedList InUseBlocks; - // The free list is sorted based on the committed size of blocks. - DoublyLinkedList FreeBlocks; uptr AllocatedBytes; uptr FreedBytes; uptr LargestSize; @@ -114,35 +283,32 @@ // For allocations requested with an alignment greater than or equal to a page, // the committed memory will amount to something close to Size - AlignmentHint // (pending rounding and headers). -template -void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, - uptr *BlockEnd, - bool ZeroContents) { +template +void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, + uptr *BlockEnd, bool ZeroContents) { DCHECK_GE(Size, AlignmentHint); const uptr PageSize = getPageSizeCached(); const uptr RoundedSize = roundUpTo(Size + LargeBlock::getHeaderSize(), PageSize); - if (MaxFreeListSize && AlignmentHint < PageSize) { - ScopedLock L(Mutex); - for (auto &H : FreeBlocks) { - const uptr FreeBlockSize = H.BlockEnd - reinterpret_cast(&H); - if (FreeBlockSize < RoundedSize) - continue; - // Candidate free block should only be at most 4 pages larger. - if (FreeBlockSize > RoundedSize + 4 * PageSize) - break; - FreeBlocks.remove(&H); - InUseBlocks.push_back(&H); - AllocatedBytes += FreeBlockSize; - NumberOfAllocs++; - Stats.add(StatAllocated, FreeBlockSize); + if (AlignmentHint < PageSize && CacheT::canCache(RoundedSize)) { + LargeBlock::Header *H; + if (Cache.retrieve(RoundedSize, &H)) { if (BlockEnd) - *BlockEnd = H.BlockEnd; - void *Ptr = reinterpret_cast(reinterpret_cast(&H) + + *BlockEnd = H->BlockEnd; + void *Ptr = reinterpret_cast(reinterpret_cast(H) + LargeBlock::getHeaderSize()); if (ZeroContents) - memset(Ptr, 0, H.BlockEnd - reinterpret_cast(Ptr)); + memset(Ptr, 0, H->BlockEnd - reinterpret_cast(Ptr)); + const uptr BlockSize = H->BlockEnd - reinterpret_cast(H); + { + ScopedLock L(Mutex); + InUseBlocks.push_back(H); + AllocatedBytes += BlockSize; + NumberOfAllocs++; + Stats.add(StatAllocated, BlockSize); + Stats.add(StatMapped, H->MapSize); + } return Ptr; } } @@ -191,6 +357,8 @@ H->MapSize = MapEnd - MapBase; H->BlockEnd = CommitBase + CommitSize; H->Data = Data; + if (BlockEnd) + *BlockEnd = CommitBase + CommitSize; { ScopedLock L(Mutex); InUseBlocks.push_back(H); @@ -201,52 +369,31 @@ Stats.add(StatAllocated, CommitSize); Stats.add(StatMapped, H->MapSize); } - if (BlockEnd) - *BlockEnd = CommitBase + CommitSize; return reinterpret_cast(Ptr + LargeBlock::getHeaderSize()); } -template -void MapAllocator::deallocate(void *Ptr) { +template void MapAllocator::deallocate(void *Ptr) { LargeBlock::Header *H = LargeBlock::getHeader(Ptr); const uptr Block = reinterpret_cast(H); + const uptr CommitSize = H->BlockEnd - Block; { ScopedLock L(Mutex); InUseBlocks.remove(H); - const uptr CommitSize = H->BlockEnd - Block; FreedBytes += CommitSize; NumberOfFrees++; Stats.sub(StatAllocated, CommitSize); - if (MaxFreeListSize && FreeBlocks.size() < MaxFreeListSize) { - bool Inserted = false; - for (auto &F : FreeBlocks) { - const uptr FreeBlockSize = F.BlockEnd - reinterpret_cast(&F); - if (FreeBlockSize >= CommitSize) { - FreeBlocks.insert(H, &F); - Inserted = true; - break; - } - } - if (!Inserted) - FreeBlocks.push_back(H); - const uptr RoundedAllocationStart = - roundUpTo(Block + LargeBlock::getHeaderSize(), getPageSizeCached()); - MapPlatformData Data = H->Data; - // TODO(kostyak): use release_to_os_interval_ms - releasePagesToOS(Block, RoundedAllocationStart - Block, - H->BlockEnd - RoundedAllocationStart, &Data); - return; - } Stats.sub(StatMapped, H->MapSize); } + if (CacheT::canCache(CommitSize) && Cache.store(H)) + return; void *Addr = reinterpret_cast(H->MapBase); const uptr Size = H->MapSize; MapPlatformData Data = H->Data; unmap(Addr, Size, UNMAP_ALL, &Data); } -template -void MapAllocator::getStats(ScopedString *Str) const { +template +void MapAllocator::getStats(ScopedString *Str) const { Str->append( "Stats: MapAllocator: allocated %zu times (%zuK), freed %zu times " "(%zuK), remains %zu (%zuK) max %zuM\n", diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -347,7 +347,7 @@ // Tiny allocator, its Primary only serves chunks of 1024 bytes. using DeathSizeClassMap = scudo::SizeClassMap<1U, 10U, 10U, 10U, 1U, 10U>; typedef scudo::SizeClassAllocator64 Primary; - typedef scudo::MapAllocator<0U> Secondary; + typedef scudo::MapAllocator Secondary; template using TSDRegistryT = scudo::TSDRegistrySharedT; }; diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -29,8 +29,8 @@ memset(P, 'A', Size); EXPECT_GE(SecondaryT::getBlockSize(P), Size); L->deallocate(P); - // If we are not using a free list, blocks are unmapped on deallocation. - if (SecondaryT::getMaxFreeListSize() == 0U) + // If the Secondary can't cache that pointer, it will be unmapped. + if (!SecondaryT::canCache(Size)) EXPECT_DEATH(memset(P, 'A', Size), ""); const scudo::uptr Align = 1U << 16; @@ -55,17 +55,18 @@ } TEST(ScudoSecondaryTest, SecondaryBasic) { - testSecondaryBasic>(); + testSecondaryBasic>(); #if !SCUDO_FUCHSIA - testSecondaryBasic>(); - testSecondaryBasic>(); + testSecondaryBasic>>(); + testSecondaryBasic< + scudo::MapAllocator>>(); #endif } #if SCUDO_FUCHSIA -using LargeAllocator = scudo::MapAllocator<0U>; +using LargeAllocator = scudo::MapAllocator; #else -using LargeAllocator = scudo::MapAllocator<>; +using LargeAllocator = scudo::MapAllocator>; #endif // This exercises a variety of combinations of size and alignment for the