Index: compiler-rt/trunk/lib/scudo/standalone/CMakeLists.txt =================================================================== --- compiler-rt/trunk/lib/scudo/standalone/CMakeLists.txt +++ compiler-rt/trunk/lib/scudo/standalone/CMakeLists.txt @@ -69,6 +69,7 @@ list.h mutex.h platform.h + quarantine.h release.h report.h secondary.h Index: compiler-rt/trunk/lib/scudo/standalone/mutex.h =================================================================== --- compiler-rt/trunk/lib/scudo/standalone/mutex.h +++ compiler-rt/trunk/lib/scudo/standalone/mutex.h @@ -16,6 +16,7 @@ class StaticSpinMutex { public: + void initLinkerInitialized() {} void init() { atomic_store_relaxed(&State, 0); } void lock() { Index: compiler-rt/trunk/lib/scudo/standalone/quarantine.h =================================================================== --- compiler-rt/trunk/lib/scudo/standalone/quarantine.h +++ compiler-rt/trunk/lib/scudo/standalone/quarantine.h @@ -0,0 +1,291 @@ +//===-- quarantine.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_QUARANTINE_H_ +#define SCUDO_QUARANTINE_H_ + +#include "list.h" +#include "mutex.h" +#include "string_utils.h" + +namespace scudo { + +struct QuarantineBatch { + // With the following count, a batch (and the header that protects it) occupy + // 4096 bytes on 32-bit platforms, and 8192 bytes on 64-bit. + static const u32 MaxCount = 1019; + QuarantineBatch *Next; + uptr Size; + u32 Count; + void *Batch[MaxCount]; + + void init(void *Ptr, uptr Size) { + Count = 1; + Batch[0] = Ptr; + this->Size = Size + sizeof(QuarantineBatch); // Account for the Batch Size. + } + + // The total size of quarantined nodes recorded in this batch. + uptr getQuarantinedSize() const { return Size - sizeof(QuarantineBatch); } + + void push_back(void *Ptr, uptr Size) { + DCHECK_LT(Count, MaxCount); + Batch[Count++] = Ptr; + this->Size += Size; + } + + bool canMerge(const QuarantineBatch *const From) const { + return Count + From->Count <= MaxCount; + } + + void merge(QuarantineBatch *const From) { + DCHECK_LE(Count + From->Count, MaxCount); + DCHECK_GE(Size, sizeof(QuarantineBatch)); + + for (uptr I = 0; I < From->Count; ++I) + Batch[Count + I] = From->Batch[I]; + Count += From->Count; + Size += From->getQuarantinedSize(); + + From->Count = 0; + From->Size = sizeof(QuarantineBatch); + } + + void shuffle(u32 State) { ::scudo::shuffle(Batch, Count, &State); } +}; + +COMPILER_CHECK(sizeof(QuarantineBatch) <= (1U << 13)); // 8Kb. + +// Per-thread cache of memory blocks. +template class QuarantineCache { +public: + void initLinkerInitialized() {} + void init() { + memset(this, 0, sizeof(*this)); + initLinkerInitialized(); + } + + // Total memory used, including internal accounting. + uptr getSize() const { return atomic_load_relaxed(&Size); } + // Memory used for internal accounting. + uptr getOverheadSize() const { return List.size() * sizeof(QuarantineBatch); } + + void enqueue(Callback Cb, void *Ptr, uptr Size) { + if (List.empty() || List.back()->Count == QuarantineBatch::MaxCount) { + QuarantineBatch *B = + reinterpret_cast(Cb.allocate(sizeof(*B))); + DCHECK(B); + B->init(Ptr, Size); + enqueueBatch(B); + } else { + List.back()->push_back(Ptr, Size); + addToSize(Size); + } + } + + void transfer(QuarantineCache *From) { + List.append_back(&From->List); + addToSize(From->getSize()); + atomic_store_relaxed(&From->Size, 0); + } + + void enqueueBatch(QuarantineBatch *B) { + List.push_back(B); + addToSize(B->Size); + } + + QuarantineBatch *dequeueBatch() { + if (List.empty()) + return nullptr; + QuarantineBatch *B = List.front(); + List.pop_front(); + subFromSize(B->Size); + return B; + } + + void mergeBatches(QuarantineCache *ToDeallocate) { + uptr ExtractedSize = 0; + QuarantineBatch *Current = List.front(); + while (Current && Current->Next) { + if (Current->canMerge(Current->Next)) { + QuarantineBatch *Extracted = Current->Next; + // Move all the chunks into the current batch. + Current->merge(Extracted); + DCHECK_EQ(Extracted->Count, 0); + DCHECK_EQ(Extracted->Size, sizeof(QuarantineBatch)); + // Remove the next batch From the list and account for its Size. + List.extract(Current, Extracted); + ExtractedSize += Extracted->Size; + // Add it to deallocation list. + ToDeallocate->enqueueBatch(Extracted); + } else { + Current = Current->Next; + } + } + subFromSize(ExtractedSize); + } + + void printStats() const { + uptr BatchCount = 0; + uptr TotalOverheadBytes = 0; + uptr TotalBytes = 0; + uptr TotalQuarantineChunks = 0; + for (const QuarantineBatch &Batch : List) { + BatchCount++; + TotalBytes += Batch.Size; + TotalOverheadBytes += Batch.Size - Batch.getQuarantinedSize(); + TotalQuarantineChunks += Batch.Count; + } + const uptr QuarantineChunksCapacity = + BatchCount * QuarantineBatch::MaxCount; + const uptr ChunksUsagePercent = + (QuarantineChunksCapacity == 0) + ? 0 + : TotalQuarantineChunks * 100 / QuarantineChunksCapacity; + const uptr TotalQuarantinedBytes = TotalBytes - TotalOverheadBytes; + const uptr MemoryOverheadPercent = + (TotalQuarantinedBytes == 0) + ? 0 + : TotalOverheadBytes * 100 / TotalQuarantinedBytes; + Printf("Global quarantine stats: batches: %zd; bytes: %zd (user: %zd); " + "chunks: %zd (capacity: %zd); %zd%% chunks used; %zd%% memory " + "overhead\n", + BatchCount, TotalBytes, TotalQuarantinedBytes, TotalQuarantineChunks, + QuarantineChunksCapacity, ChunksUsagePercent, MemoryOverheadPercent); + } + +private: + IntrusiveList List; + atomic_uptr Size; + + void addToSize(uptr add) { atomic_store_relaxed(&Size, getSize() + add); } + void subFromSize(uptr sub) { atomic_store_relaxed(&Size, getSize() - sub); } +}; + +// The callback interface is: +// void Callback::recycle(Node *Ptr); +// void *Callback::allocate(uptr Size); +// void Callback::deallocate(void *Ptr); +template class GlobalQuarantine { +public: + typedef QuarantineCache CacheT; + + void initLinkerInitialized(uptr Size, uptr CacheSize) { + // Thread local quarantine size can be zero only when global quarantine size + // is zero (it allows us to perform just one atomic read per put() call). + CHECK((Size == 0 && CacheSize == 0) || CacheSize != 0); + + atomic_store_relaxed(&MaxSize, Size); + atomic_store_relaxed(&MinSize, Size / 10 * 9); // 90% of max size. + atomic_store_relaxed(&MaxCacheSize, CacheSize); + + Cache.initLinkerInitialized(); + CacheMutex.initLinkerInitialized(); + RecyleMutex.initLinkerInitialized(); + } + void init(uptr Size, uptr CacheSize) { + memset(this, 0, sizeof(*this)); + initLinkerInitialized(Size, CacheSize); + } + + uptr getMaxSize() const { return atomic_load_relaxed(&MaxSize); } + uptr getCacheSize() const { return atomic_load_relaxed(&MaxCacheSize); } + + void put(CacheT *C, Callback Cb, Node *Ptr, uptr Size) { + C->enqueue(Cb, Ptr, Size); + if (C->getSize() > getCacheSize()) + drain(C, Cb); + } + + void NOINLINE drain(CacheT *C, Callback Cb) { + { + SpinMutexLock L(&CacheMutex); + Cache.transfer(C); + } + if (Cache.getSize() > getMaxSize() && RecyleMutex.tryLock()) + recycle(atomic_load_relaxed(&MinSize), Cb); + } + + void NOINLINE drainAndRecycle(CacheT *C, Callback Cb) { + { + SpinMutexLock L(&CacheMutex); + Cache.transfer(C); + } + RecyleMutex.lock(); + recycle(0, Cb); + } + + void printStats() const { + // It assumes that the world is stopped, just as the allocator's printStats. + Printf("Quarantine limits: global: %zdM; thread local: %zdK\n", + getMaxSize() >> 20, getCacheSize() >> 10); + Cache.printStats(); + } + +private: + // Read-only data. + alignas(SCUDO_CACHE_LINE_SIZE) StaticSpinMutex CacheMutex; + CacheT Cache; + alignas(SCUDO_CACHE_LINE_SIZE) StaticSpinMutex RecyleMutex; + atomic_uptr MinSize; + atomic_uptr MaxSize; + alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize; + + void NOINLINE recycle(uptr MinSize, Callback Cb) { + CacheT Tmp; + Tmp.init(); + { + SpinMutexLock L(&CacheMutex); + // Go over the batches and merge partially filled ones to + // save some memory, otherwise batches themselves (since the memory used + // by them is counted against quarantine limit) can overcome the actual + // user's quarantined chunks, which diminishes the purpose of the + // quarantine. + const uptr CacheSize = Cache.getSize(); + const uptr OverheadSize = Cache.getOverheadSize(); + DCHECK_GE(CacheSize, OverheadSize); + // Do the merge only when overhead exceeds this predefined limit (might + // require some tuning). It saves us merge attempt when the batch list + // quarantine is unlikely to contain batches suitable for merge. + constexpr uptr OverheadThresholdPercents = 100; + if (CacheSize > OverheadSize && + OverheadSize * (100 + OverheadThresholdPercents) > + CacheSize * OverheadThresholdPercents) { + Cache.mergeBatches(&Tmp); + } + // Extract enough chunks from the quarantine to get below the max + // quarantine size and leave some leeway for the newly quarantined chunks. + while (Cache.getSize() > MinSize) + Tmp.enqueueBatch(Cache.dequeueBatch()); + } + RecyleMutex.unlock(); + doRecycle(&Tmp, Cb); + } + + void NOINLINE doRecycle(CacheT *C, Callback Cb) { + while (QuarantineBatch *B = C->dequeueBatch()) { + const u32 Seed = static_cast( + (reinterpret_cast(B) ^ reinterpret_cast(C)) >> 4); + B->shuffle(Seed); + constexpr uptr NumberOfPrefetch = 8UL; + CHECK(NumberOfPrefetch <= ARRAY_SIZE(B->Batch)); + for (uptr I = 0; I < NumberOfPrefetch; I++) + PREFETCH(B->Batch[I]); + for (uptr I = 0, Count = B->Count; I < Count; I++) { + if (I + NumberOfPrefetch < Count) + PREFETCH(B->Batch[I + NumberOfPrefetch]); + Cb.recycle(reinterpret_cast(B->Batch[I])); + } + Cb.deallocate(B); + } + } +}; + +} // namespace scudo + +#endif // SCUDO_QUARANTINE_H_ Index: compiler-rt/trunk/lib/scudo/standalone/tests/CMakeLists.txt =================================================================== --- compiler-rt/trunk/lib/scudo/standalone/tests/CMakeLists.txt +++ compiler-rt/trunk/lib/scudo/standalone/tests/CMakeLists.txt @@ -56,6 +56,7 @@ list_test.cc map_test.cc mutex_test.cc + quarantine_test.cc release_test.cc report_test.cc secondary_test.cc Index: compiler-rt/trunk/lib/scudo/standalone/tests/quarantine_test.cc =================================================================== --- compiler-rt/trunk/lib/scudo/standalone/tests/quarantine_test.cc +++ compiler-rt/trunk/lib/scudo/standalone/tests/quarantine_test.cc @@ -0,0 +1,240 @@ +//===-- quarantine_test.cc --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "quarantine.h" + +#include "gtest/gtest.h" + +#include + +static void *FakePtr = reinterpret_cast(0xFA83FA83); +static const scudo::uptr BlockSize = 8UL; +static const scudo::uptr LargeBlockSize = 16384UL; + +struct QuarantineCallback { + void recycle(void *P) { EXPECT_EQ(P, FakePtr); } + void *allocate(scudo::uptr Size) { return malloc(Size); } + void deallocate(void *P) { free(P); } +}; + +typedef scudo::GlobalQuarantine QuarantineT; +typedef typename QuarantineT::CacheT CacheT; + +static QuarantineCallback Cb; + +static void deallocateCache(CacheT *Cache) { + while (scudo::QuarantineBatch *Batch = Cache->dequeueBatch()) + Cb.deallocate(Batch); +} + +TEST(ScudoQuarantineTest, QuarantineBatchMerge) { + // Verify the trivial case. + scudo::QuarantineBatch Into; + Into.init(FakePtr, 4UL); + scudo::QuarantineBatch From; + From.init(FakePtr, 8UL); + + Into.merge(&From); + + EXPECT_EQ(Into.Count, 2UL); + EXPECT_EQ(Into.Batch[0], FakePtr); + EXPECT_EQ(Into.Batch[1], FakePtr); + EXPECT_EQ(Into.Size, 12UL + sizeof(scudo::QuarantineBatch)); + EXPECT_EQ(Into.getQuarantinedSize(), 12UL); + + EXPECT_EQ(From.Count, 0UL); + EXPECT_EQ(From.Size, sizeof(scudo::QuarantineBatch)); + EXPECT_EQ(From.getQuarantinedSize(), 0UL); + + // Merge the batch to the limit. + for (scudo::uptr I = 2; I < scudo::QuarantineBatch::MaxCount; ++I) + From.push_back(FakePtr, 8UL); + EXPECT_TRUE(Into.Count + From.Count == scudo::QuarantineBatch::MaxCount); + EXPECT_TRUE(Into.canMerge(&From)); + + Into.merge(&From); + EXPECT_TRUE(Into.Count == scudo::QuarantineBatch::MaxCount); + + // No more space, not even for one element. + From.init(FakePtr, 8UL); + + EXPECT_FALSE(Into.canMerge(&From)); +} + +TEST(ScudoQuarantineTest, QuarantineCacheMergeBatchesEmpty) { + CacheT Cache; + CacheT ToDeallocate; + Cache.init(); + ToDeallocate.init(); + Cache.mergeBatches(&ToDeallocate); + + EXPECT_EQ(ToDeallocate.getSize(), 0UL); + EXPECT_EQ(ToDeallocate.dequeueBatch(), nullptr); +} + +TEST(SanitizerCommon, QuarantineCacheMergeBatchesOneBatch) { + CacheT Cache; + Cache.init(); + Cache.enqueue(Cb, FakePtr, BlockSize); + EXPECT_EQ(BlockSize + sizeof(scudo::QuarantineBatch), Cache.getSize()); + + CacheT ToDeallocate; + ToDeallocate.init(); + Cache.mergeBatches(&ToDeallocate); + + // Nothing to merge, nothing to deallocate. + EXPECT_EQ(BlockSize + sizeof(scudo::QuarantineBatch), Cache.getSize()); + + EXPECT_EQ(ToDeallocate.getSize(), 0UL); + EXPECT_EQ(ToDeallocate.dequeueBatch(), nullptr); + + deallocateCache(&Cache); +} + +TEST(ScudoQuarantineTest, QuarantineCacheMergeBatchesSmallBatches) { + // Make a Cache with two batches small enough to merge. + CacheT From; + From.init(); + From.enqueue(Cb, FakePtr, BlockSize); + CacheT Cache; + Cache.init(); + Cache.enqueue(Cb, FakePtr, BlockSize); + + Cache.transfer(&From); + EXPECT_EQ(BlockSize * 2 + sizeof(scudo::QuarantineBatch) * 2, + Cache.getSize()); + + CacheT ToDeallocate; + ToDeallocate.init(); + Cache.mergeBatches(&ToDeallocate); + + // Batches merged, one batch to deallocate. + EXPECT_EQ(BlockSize * 2 + sizeof(scudo::QuarantineBatch), Cache.getSize()); + EXPECT_EQ(ToDeallocate.getSize(), sizeof(scudo::QuarantineBatch)); + + deallocateCache(&Cache); + deallocateCache(&ToDeallocate); +} + +TEST(ScudoQuarantineTest, QuarantineCacheMergeBatchesTooBigToMerge) { + const scudo::uptr NumBlocks = scudo::QuarantineBatch::MaxCount - 1; + + // Make a Cache with two batches small enough to merge. + CacheT From; + CacheT Cache; + From.init(); + Cache.init(); + for (scudo::uptr I = 0; I < NumBlocks; ++I) { + From.enqueue(Cb, FakePtr, BlockSize); + Cache.enqueue(Cb, FakePtr, BlockSize); + } + Cache.transfer(&From); + EXPECT_EQ(BlockSize * NumBlocks * 2 + sizeof(scudo::QuarantineBatch) * 2, + Cache.getSize()); + + CacheT ToDeallocate; + ToDeallocate.init(); + Cache.mergeBatches(&ToDeallocate); + + // Batches cannot be merged. + EXPECT_EQ(BlockSize * NumBlocks * 2 + sizeof(scudo::QuarantineBatch) * 2, + Cache.getSize()); + EXPECT_EQ(ToDeallocate.getSize(), 0UL); + + deallocateCache(&Cache); +} + +TEST(ScudoQuarantineTest, QuarantineCacheMergeBatchesALotOfBatches) { + const scudo::uptr NumBatchesAfterMerge = 3; + const scudo::uptr NumBlocks = + scudo::QuarantineBatch::MaxCount * NumBatchesAfterMerge; + const scudo::uptr NumBatchesBeforeMerge = NumBlocks; + + // Make a Cache with many small batches. + CacheT Cache; + Cache.init(); + for (scudo::uptr I = 0; I < NumBlocks; ++I) { + CacheT From; + From.init(); + From.enqueue(Cb, FakePtr, BlockSize); + Cache.transfer(&From); + } + + EXPECT_EQ(BlockSize * NumBlocks + + sizeof(scudo::QuarantineBatch) * NumBatchesBeforeMerge, + Cache.getSize()); + + CacheT ToDeallocate; + ToDeallocate.init(); + Cache.mergeBatches(&ToDeallocate); + + // All blocks should fit Into 3 batches. + EXPECT_EQ(BlockSize * NumBlocks + + sizeof(scudo::QuarantineBatch) * NumBatchesAfterMerge, + Cache.getSize()); + + EXPECT_EQ(ToDeallocate.getSize(), + sizeof(scudo::QuarantineBatch) * + (NumBatchesBeforeMerge - NumBatchesAfterMerge)); + + deallocateCache(&Cache); + deallocateCache(&ToDeallocate); +} + +static const scudo::uptr MaxQuarantineSize = 1024UL << 10; // 1MB +static const scudo::uptr MaxCacheSize = 256UL << 10; // 256KB + +TEST(ScudoQuarantineTest, GlobalQuarantine) { + QuarantineT Quarantine; + CacheT Cache; + Cache.init(); + Quarantine.init(MaxQuarantineSize, MaxCacheSize); + EXPECT_EQ(Quarantine.getMaxSize(), MaxQuarantineSize); + EXPECT_EQ(Quarantine.getCacheSize(), MaxCacheSize); + + bool DrainOccurred = false; + scudo::uptr CacheSize = Cache.getSize(); + EXPECT_EQ(Cache.getSize(), 0UL); + // We quarantine enough blocks that a drain has to occur. Verify this by + // looking for a decrease of the size of the cache. + for (scudo::uptr I = 0; I < 128UL; I++) { + Quarantine.put(&Cache, Cb, FakePtr, LargeBlockSize); + if (!DrainOccurred && Cache.getSize() < CacheSize) + DrainOccurred = true; + CacheSize = Cache.getSize(); + } + EXPECT_TRUE(DrainOccurred); + + Quarantine.drainAndRecycle(&Cache, Cb); + EXPECT_EQ(Cache.getSize(), 0UL); + + Quarantine.printStats(); +} + +void *populateQuarantine(void *Param) { + CacheT Cache; + Cache.init(); + QuarantineT *Quarantine = reinterpret_cast(Param); + for (scudo::uptr I = 0; I < 128UL; I++) + Quarantine->put(&Cache, Cb, FakePtr, LargeBlockSize); + return 0; +} + +TEST(ScudoQuarantineTest, ThreadedGlobalQuarantine) { + QuarantineT Quarantine; + Quarantine.init(MaxQuarantineSize, MaxCacheSize); + + const scudo::uptr NumberOfThreads = 32U; + pthread_t T[NumberOfThreads]; + for (scudo::uptr I = 0; I < NumberOfThreads; I++) + pthread_create(&T[I], 0, populateQuarantine, &Quarantine); + for (scudo::uptr I = 0; I < NumberOfThreads; I++) + pthread_join(T[I], 0); + + Quarantine.printStats(); +}