diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -49,6 +49,99 @@ MapPlatformData *Data = nullptr; }; +// A buffer pool which holds a fixed number of static buffers for fast buffer +// allocation. If the request size is greater than `StaticBufferSize`, it'll +// delegate the allocation to map(). +template class BufferPool { +public: + // Preserve 1 bit in the `Mask` so that we don't need to do zero-check while + // extracting the least significant bit from the `Mask`. + static_assert(StaticBufferCount < SCUDO_WORDSIZE, ""); + static_assert(isAligned(StaticBufferSize, SCUDO_CACHE_LINE_SIZE), ""); + + // Return a buffer which is at least `BufferSize`. + uptr *getBuffer(const uptr BufferSize) { + if (UNLIKELY(BufferSize > StaticBufferSize)) + return getDynamicBuffer(BufferSize); + + uptr index; + { + // TODO: In general, we expect this operation should be fast so the + // waiting thread won't be put into sleep. The HybridMutex does implement + // the busy-waiting but we may want to review the performance and see if + // we need an explict spin lock here. + ScopedLock L(Mutex); + index = getLeastSignificantSetBitIndex(Mask); + if (index < StaticBufferCount) + Mask ^= static_cast(1) << index; + } + + if (index >= StaticBufferCount) + return getDynamicBuffer(BufferSize); + + const uptr Offset = index * StaticBufferSize; + memset(&RawBuffer[Offset], 0, StaticBufferSize); + return &RawBuffer[Offset]; + } + + void releaseBuffer(uptr *Buffer, const uptr BufferSize) { + const uptr index = getStaticBufferIndex(Buffer, BufferSize); + if (index < StaticBufferCount) { + ScopedLock L(Mutex); + DCHECK_EQ((Mask & (static_cast(1) << index)), 0U); + Mask |= static_cast(1) << index; + } else { + unmap(reinterpret_cast(Buffer), + roundUp(BufferSize, getPageSizeCached())); + } + } + + bool isStaticBufferTestOnly(uptr *Buffer, uptr BufferSize) { + return getStaticBufferIndex(Buffer, BufferSize) < StaticBufferCount; + } + +private: + uptr getStaticBufferIndex(uptr *Buffer, uptr BufferSize) { + if (UNLIKELY(BufferSize > StaticBufferSize)) + return StaticBufferCount; + + const uptr BufferBase = reinterpret_cast(Buffer); + const uptr RawBufferBase = reinterpret_cast(RawBuffer); + + if (BufferBase < RawBufferBase || + BufferBase >= RawBufferBase + sizeof(RawBuffer)) { + return StaticBufferCount; + } + + DCHECK_LE(BufferSize, StaticBufferSize); + DCHECK_LE(BufferBase + BufferSize, RawBufferBase + sizeof(RawBuffer)); + DCHECK_EQ((BufferBase - RawBufferBase) % StaticBufferSize, 0U); + + const uptr index = + (BufferBase - RawBufferBase) / (StaticBufferSize * sizeof(uptr)); + DCHECK_LT(index, StaticBufferCount); + return index; + } + + uptr *getDynamicBuffer(const uptr BufferSize) { + // When using a heap-based buffer, precommit the pages backing the + // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization + // where page fault exceptions are skipped as the allocated memory + // is accessed. So far, this is only enabled on Fuchsia. It hasn't proven a + // performance benefit on other platforms. + const uptr MmapFlags = MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0); + return reinterpret_cast( + map(nullptr, roundUp(BufferSize, getPageSizeCached()), "scudo:counters", + MmapFlags, &MapData)); + } + + HybridMutex Mutex; + // '1' means that buffer index is not used. '0' means the buffer is in use. + uptr Mask GUARDED_BY(Mutex) = ~static_cast(0); + uptr RawBuffer[StaticBufferCount * StaticBufferSize] GUARDED_BY(Mutex); + [[no_unique_address]] MapPlatformData MapData = {}; +}; + // A Region page map is used to record the usage of pages in the regions. It // implements a packed array of Counters. Each counter occupies 2^N bits, enough // to store counter's MaxValue. Ctor will try to use a static buffer first, and @@ -76,11 +169,7 @@ ~RegionPageMap() { if (!isAllocated()) return; - if (Buffer == &StaticBuffer[0]) - Mutex.unlock(); - else - unmap(reinterpret_cast(Buffer), - roundUp(BufferSize, getPageSizeCached())); + Buffers.releaseBuffer(Buffer, BufferSize); Buffer = nullptr; } @@ -88,8 +177,7 @@ // specify the thread-safety attribute properly in current code structure. // Besides, it's the only place we may want to check thread safety. Therefore, // it's fine to bypass the thread-safety analysis now. - void reset(uptr NumberOfRegion, uptr CountersPerRegion, - uptr MaxValue) NO_THREAD_SAFETY_ANALYSIS { + void reset(uptr NumberOfRegion, uptr CountersPerRegion, uptr MaxValue) { DCHECK_GT(NumberOfRegion, 0); DCHECK_GT(CountersPerRegion, 0); DCHECK_GT(MaxValue, 0); @@ -115,21 +203,8 @@ roundUp(NumCounters, static_cast(1U) << PackingRatioLog) >> PackingRatioLog; BufferSize = SizePerRegion * sizeof(*Buffer) * Regions; - if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) && - Mutex.tryLock()) { - Buffer = &StaticBuffer[0]; - memset(Buffer, 0, BufferSize); - } else { - // When using a heap-based buffer, precommit the pages backing the - // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization - // where page fault exceptions are skipped as the allocated memory - // is accessed. - const uptr MmapFlags = - MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0); - Buffer = reinterpret_cast( - map(nullptr, roundUp(BufferSize, getPageSizeCached()), - "scudo:counters", MmapFlags, &MapData)); - } + Buffer = Buffers.getBuffer(BufferSize); + DCHECK_NE(Buffer, nullptr); } bool isAllocated() const { return !!Buffer; } @@ -206,8 +281,6 @@ uptr getBufferSize() const { return BufferSize; } - static const uptr StaticBufferCount = 2048U; - private: uptr Regions; uptr NumCounters; @@ -219,10 +292,12 @@ uptr SizePerRegion; uptr BufferSize; uptr *Buffer; - [[no_unique_address]] MapPlatformData MapData = {}; - static HybridMutex Mutex; - static uptr StaticBuffer[StaticBufferCount] GUARDED_BY(Mutex); + // We may consider making this configurable if there are cases which may + // benefit from this. + static const uptr StaticBufferCount = 2U; + static const uptr StaticBufferSize = 512U; + static BufferPool Buffers; }; template class FreePagesRangeTracker { diff --git a/compiler-rt/lib/scudo/standalone/release.cpp b/compiler-rt/lib/scudo/standalone/release.cpp --- a/compiler-rt/lib/scudo/standalone/release.cpp +++ b/compiler-rt/lib/scudo/standalone/release.cpp @@ -10,7 +10,7 @@ namespace scudo { -HybridMutex RegionPageMap::Mutex = {}; -uptr RegionPageMap::StaticBuffer[RegionPageMap::StaticBufferCount]; +BufferPool + RegionPageMap::Buffers; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp @@ -562,3 +562,24 @@ testReleasePartialRegion(); testReleasePartialRegion(); } + +TEST(ScudoReleaseTest, BufferPool) { + constexpr scudo::uptr StaticBufferCount = SCUDO_WORDSIZE - 1; + constexpr scudo::uptr StaticBufferSize = 512U; + scudo::BufferPool Pool; + + std::vector> Buffers; + for (scudo::uptr I = 0; I < StaticBufferCount; ++I) { + scudo::uptr *P = Pool.getBuffer(StaticBufferSize); + EXPECT_TRUE(Pool.isStaticBufferTestOnly(P, StaticBufferSize)); + Buffers.emplace_back(P, StaticBufferSize); + } + + // The static buffer is supposed to be used up. + scudo::uptr *P = Pool.getBuffer(StaticBufferSize); + EXPECT_FALSE(Pool.isStaticBufferTestOnly(P, StaticBufferSize)); + + Pool.releaseBuffer(P, StaticBufferSize); + for (auto &Buffer : Buffers) + Pool.releaseBuffer(Buffer.first, Buffer.second); +}