diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -467,28 +467,33 @@ } } - // TODO(kostyak): currently not ideal as we loop over all regions and - // iterate multiple times over the same freelist if a ClassId spans multiple - // regions. But it will have to do for now. - uptr TotalReleasedBytes = 0; - const uptr MaxSize = (RegionSize / BlockSize) * BlockSize; + DCHECK_GT(MinRegionIndex, 0U); + uptr First = 0; for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { if (PossibleRegions[I] - 1U == ClassId) { - const uptr Region = I * RegionSize; - // If the region is the one currently associated to the size-class, we - // only need to release up to CurrentRegionAllocated, MaxSize otherwise. - const uptr Size = (Region == Sci->CurrentRegion) - ? Sci->CurrentRegionAllocated - : MaxSize; - ReleaseRecorder Recorder(Region); - releaseFreeMemoryToOS(Sci->FreeList, Region, Size, BlockSize, - &Recorder); - if (Recorder.getReleasedRangesCount() > 0) { - Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; - Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); - Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); - TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; - } + First = I; + break; + } + } + uptr Last = 0; + for (uptr I = MaxRegionIndex; I >= MinRegionIndex; I--) { + if (PossibleRegions[I] - 1U == ClassId) { + Last = I; + break; + } + } + uptr TotalReleasedBytes = 0; + if (First && Last) { + const uptr Base = First * RegionSize; + const uptr NumberOfRegions = Last - First + 1U; + ReleaseRecorder Recorder(Base); + releaseFreeMemoryToOS(Sci->FreeList, Base, RegionSize, NumberOfRegions, + BlockSize, &Recorder); + if (Recorder.getReleasedRangesCount() > 0) { + Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; + Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); + Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; } } Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -481,7 +481,7 @@ ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data); releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg, - Region->AllocatedUser, BlockSize, &Recorder); + Region->AllocatedUser, 1U, BlockSize, &Recorder); if (Recorder.getReleasedRangesCount() > 0) { Region->ReleaseInfo.PushedBlocksAtLastRelease = diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -49,7 +49,10 @@ // incremented past MaxValue. class PackedCounterArray { public: - PackedCounterArray(uptr NumCounters, uptr MaxValue) : N(NumCounters) { + PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion, + uptr MaxValue) + : Regions(NumberOfRegions), NumCounters(CountersPerRegion) { + CHECK_GT(Regions, 0); CHECK_GT(NumCounters, 0); CHECK_GT(MaxValue, 0); constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL; @@ -66,9 +69,10 @@ PackingRatioLog = getLog2(PackingRatio); BitOffsetMask = PackingRatio - 1; - BufferSize = (roundUpTo(N, static_cast(1U) << PackingRatioLog) >> - PackingRatioLog) * - sizeof(*Buffer); + SizePerRegion = + roundUpTo(NumCounters, static_cast(1U) << PackingRatioLog) >> + PackingRatioLog; + BufferSize = SizePerRegion * sizeof(*Buffer) * Regions; if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) && Mutex.tryLock()) { Buffer = &StaticBuffer[0]; @@ -89,41 +93,45 @@ bool isAllocated() const { return !!Buffer; } - uptr getCount() const { return N; } + uptr getCount() const { return NumCounters; } - uptr get(uptr I) const { - DCHECK_LT(I, N); + uptr get(uptr Region, uptr I) const { + DCHECK_LT(Region, Regions); + DCHECK_LT(I, NumCounters); const uptr Index = I >> PackingRatioLog; const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; - return (Buffer[Index] >> BitOffset) & CounterMask; + return (Buffer[Region * SizePerRegion + Index] >> BitOffset) & CounterMask; } - void inc(uptr I) const { - DCHECK_LT(get(I), CounterMask); + void inc(uptr Region, uptr I) const { + DCHECK_LT(get(Region, I), CounterMask); const uptr Index = I >> PackingRatioLog; const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; DCHECK_LT(BitOffset, SCUDO_WORDSIZE); - Buffer[Index] += static_cast(1U) << BitOffset; + Buffer[Region * SizePerRegion + Index] += static_cast(1U) + << BitOffset; } - void incRange(uptr From, uptr To) const { + void incRange(uptr Region, uptr From, uptr To) const { DCHECK_LE(From, To); - const uptr Top = Min(To + 1, N); + const uptr Top = Min(To + 1, NumCounters); for (uptr I = From; I < Top; I++) - inc(I); + inc(Region, I); } uptr getBufferSize() const { return BufferSize; } - static const uptr StaticBufferCount = 1024U; + static const uptr StaticBufferCount = 2048U; private: - const uptr N; + const uptr Regions; + const uptr NumCounters; uptr CounterSizeBitsLog; uptr CounterMask; uptr PackingRatioLog; uptr BitOffsetMask; + uptr SizePerRegion; uptr BufferSize; uptr *Buffer; @@ -169,7 +177,8 @@ template NOINLINE void releaseFreeMemoryToOS(const IntrusiveList &FreeList, uptr Base, - uptr Size, uptr BlockSize, ReleaseRecorderT *Recorder) { + uptr RegionSize, uptr NumberOfRegions, uptr BlockSize, + ReleaseRecorderT *Recorder) { const uptr PageSize = getPageSizeCached(); // Figure out the number of chunks per page and whether we can take a fast @@ -206,13 +215,15 @@ } } - const uptr PagesCount = roundUpTo(Size, PageSize) / PageSize; - PackedCounterArray Counters(PagesCount, FullPagesBlockCountMax); + const uptr PagesCount = roundUpTo(RegionSize, PageSize) / PageSize; + PackedCounterArray Counters(NumberOfRegions, PagesCount, + FullPagesBlockCountMax); if (!Counters.isAllocated()) return; const uptr PageSizeLog = getLog2(PageSize); - const uptr RoundedSize = PagesCount << PageSizeLog; + const uptr RoundedRegionSize = PagesCount << PageSizeLog; + const uptr RoundedSize = NumberOfRegions * RoundedRegionSize; // Iterate over free chunks and count how many free chunks affect each // allocated page. @@ -228,14 +239,17 @@ for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { const uptr P = reinterpret_cast(It.get(I)) - Base; // This takes care of P < Base and P >= Base + RoundedSize. - if (P < RoundedSize) - Counters.inc(P >> PageSizeLog); + if (P < RoundedSize) { + const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; + const uptr PInRegion = P - RegionIndex * RegionSize; + Counters.inc(RegionIndex, PInRegion >> PageSizeLog); + } } } - for (uptr P = Size; P < RoundedSize; P += BlockSize) - Counters.inc(P >> PageSizeLog); } else { // In all other cases chunks might affect more than one page. + DCHECK_GE(RegionSize, BlockSize); + const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize; for (const auto &It : FreeList) { // See TransferBatch comment above. const bool IsTransferBatch = @@ -244,13 +258,24 @@ for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { const uptr P = reinterpret_cast(It.get(I)) - Base; // This takes care of P < Base and P >= Base + RoundedSize. - if (P < RoundedSize) - Counters.incRange(P >> PageSizeLog, - (P + BlockSize - 1) >> PageSizeLog); + if (P < RoundedSize) { + const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; + uptr PInRegion = P - RegionIndex * RegionSize; + Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, + (PInRegion + BlockSize - 1) >> PageSizeLog); + // The last block in a region might straddle a page, so if it's + // free, we mark the following "pretend" memory block(s) as free. + if (PInRegion == LastBlockInRegion) { + PInRegion += BlockSize; + while (PInRegion < RoundedRegionSize) { + Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, + (PInRegion + BlockSize - 1) >> PageSizeLog); + PInRegion += BlockSize; + } + } + } } } - for (uptr P = Size; P < RoundedSize; P += BlockSize) - Counters.incRange(P >> PageSizeLog, (P + BlockSize - 1) >> PageSizeLog); } // Iterate over pages detecting ranges of pages with chunk Counters equal @@ -258,8 +283,10 @@ FreePagesRangeTracker RangeTracker(Recorder); if (SameBlockCountPerPage) { // Fast path, every page has the same number of chunks affecting it. - for (uptr I = 0; I < Counters.getCount(); I++) - RangeTracker.processNextPage(Counters.get(I) == FullPagesBlockCountMax); + for (uptr I = 0; I < NumberOfRegions; I++) + for (uptr J = 0; J < PagesCount; J++) + RangeTracker.processNextPage(Counters.get(I, J) == + FullPagesBlockCountMax); } else { // Slow path, go through the pages keeping count how many chunks affect // each page. @@ -270,23 +297,25 @@ // except the first and the last one) and then the last chunk size, adding // up the number of chunks on the current page and checking on every step // whether the page boundary was crossed. - uptr PrevPageBoundary = 0; - uptr CurrentBoundary = 0; - for (uptr I = 0; I < Counters.getCount(); I++) { - const uptr PageBoundary = PrevPageBoundary + PageSize; - uptr BlocksPerPage = Pn; - if (CurrentBoundary < PageBoundary) { - if (CurrentBoundary > PrevPageBoundary) - BlocksPerPage++; - CurrentBoundary += Pnc; + for (uptr I = 0; I < NumberOfRegions; I++) { + uptr PrevPageBoundary = 0; + uptr CurrentBoundary = 0; + for (uptr J = 0; J < PagesCount; J++) { + const uptr PageBoundary = PrevPageBoundary + PageSize; + uptr BlocksPerPage = Pn; if (CurrentBoundary < PageBoundary) { - BlocksPerPage++; - CurrentBoundary += BlockSize; + if (CurrentBoundary > PrevPageBoundary) + BlocksPerPage++; + CurrentBoundary += Pnc; + if (CurrentBoundary < PageBoundary) { + BlocksPerPage++; + CurrentBoundary += BlockSize; + } } - } - PrevPageBoundary = PageBoundary; + PrevPageBoundary = PageBoundary; - RangeTracker.processNextPage(Counters.get(I) == BlocksPerPage); + RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage); + } } } RangeTracker.finish(); diff --git a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp @@ -21,14 +21,14 @@ TEST(ScudoReleaseTest, PackedCounterArray) { for (scudo::uptr I = 0; I < SCUDO_WORDSIZE; I++) { // Various valid counter's max values packed into one word. - scudo::PackedCounterArray Counters2N(1, 1UL << I); + scudo::PackedCounterArray Counters2N(1U, 1U, 1UL << I); EXPECT_EQ(sizeof(scudo::uptr), Counters2N.getBufferSize()); // Check the "all bit set" values too. - scudo::PackedCounterArray Counters2N1_1(1, ~0UL >> I); + scudo::PackedCounterArray Counters2N1_1(1U, 1U, ~0UL >> I); EXPECT_EQ(sizeof(scudo::uptr), Counters2N1_1.getBufferSize()); // Verify the packing ratio, the counter is Expected to be packed into the // closest power of 2 bits. - scudo::PackedCounterArray Counters(SCUDO_WORDSIZE, 1UL << I); + scudo::PackedCounterArray Counters(1U, SCUDO_WORDSIZE, 1UL << I); EXPECT_EQ(sizeof(scudo::uptr) * scudo::roundUpToPowerOfTwo(I + 1), Counters.getBufferSize()); } @@ -38,19 +38,20 @@ // Make sure counters request one memory page for the buffer. const scudo::uptr NumCounters = (scudo::getPageSizeCached() / 8) * (SCUDO_WORDSIZE >> I); - scudo::PackedCounterArray Counters(NumCounters, 1UL << ((1UL << I) - 1)); - Counters.inc(0); + scudo::PackedCounterArray Counters(1U, NumCounters, + 1UL << ((1UL << I) - 1)); + Counters.inc(0U, 0U); for (scudo::uptr C = 1; C < NumCounters - 1; C++) { - EXPECT_EQ(0UL, Counters.get(C)); - Counters.inc(C); - EXPECT_EQ(1UL, Counters.get(C - 1)); + EXPECT_EQ(0UL, Counters.get(0U, C)); + Counters.inc(0U, C); + EXPECT_EQ(1UL, Counters.get(0U, C - 1)); } - EXPECT_EQ(0UL, Counters.get(NumCounters - 1)); - Counters.inc(NumCounters - 1); + EXPECT_EQ(0UL, Counters.get(0U, NumCounters - 1)); + Counters.inc(0U, NumCounters - 1); if (I > 0) { - Counters.incRange(0, NumCounters - 1); + Counters.incRange(0u, 0U, NumCounters - 1); for (scudo::uptr C = 0; C < NumCounters; C++) - EXPECT_EQ(2UL, Counters.get(C)); + EXPECT_EQ(2UL, Counters.get(0U, C)); } } } @@ -190,7 +191,7 @@ // Release the memory. ReleasedPagesRecorder Recorder; - releaseFreeMemoryToOS(FreeList, 0, MaxBlocks * BlockSize, BlockSize, + releaseFreeMemoryToOS(FreeList, 0, MaxBlocks * BlockSize, 1U, BlockSize, &Recorder); // Verify that there are no released pages touched by used chunks and all