diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -76,17 +76,12 @@ if (UNLIKELY(!getRandom(reinterpret_cast(&Seed), sizeof(Seed)))) Seed = static_cast( Time ^ (reinterpret_cast(SizeClassInfoArray) >> 6)); - const uptr PageSize = getPageSizeCached(); for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); Sci->RandState = getRandomU32(&Seed); // Sci->MaxRegionIndex is already initialized to 0. Sci->MinRegionIndex = NumRegions; - // See comment in the 64-bit primary about releasing smaller size classes. - Sci->CanRelease = (I != SizeClassMap::BatchClassId) && - (getSizeByClassId(I) >= (PageSize / 32)); - if (Sci->CanRelease) - Sci->ReleaseInfo.LastReleaseAtNs = Time; + Sci->ReleaseInfo.LastReleaseAtNs = Time; } setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } @@ -137,7 +132,7 @@ ScopedLock L(Sci->Mutex); Sci->FreeList.push_front(B); Sci->Stats.PushedBlocks += B->getCount(); - if (Sci->CanRelease) + if (ClassId != SizeClassMap::BatchClassId) releaseToOSMaybe(Sci, ClassId); } @@ -217,6 +212,8 @@ uptr releaseToOS() { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; SizeClassInfo *Sci = getSizeClassInfo(I); ScopedLock L(Sci->Mutex); TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true); diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -80,17 +80,7 @@ Region->RegionBeg = getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize; Region->RandState = getRandomU32(&Seed); - // Releasing smaller size classes doesn't necessarily yield to a - // meaningful RSS impact: there are more blocks per page, they are - // randomized around, and thus pages are less likely to be entirely empty. - // On top of this, attempting to release those require more iterations and - // memory accesses which ends up being fairly costly. The current lower - // limit is mostly arbitrary and based on empirical observations. - // TODO(kostyak): make the lower limit a runtime option - Region->CanRelease = (I != SizeClassMap::BatchClassId) && - (getSizeByClassId(I) >= (PageSize / 32)); - if (Region->CanRelease) - Region->ReleaseInfo.LastReleaseAtNs = Time; + Region->ReleaseInfo.LastReleaseAtNs = Time; } setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); @@ -129,7 +119,7 @@ ScopedLock L(Region->Mutex); Region->FreeList.push_front(B); Region->Stats.PushedBlocks += B->getCount(); - if (Region->CanRelease) + if (ClassId != SizeClassMap::BatchClassId) releaseToOSMaybe(Region, ClassId); } @@ -201,6 +191,8 @@ uptr releaseToOS() { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; RegionInfo *Region = getRegionInfo(I); ScopedLock L(Region->Mutex); TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true); @@ -291,7 +283,6 @@ HybridMutex Mutex; SinglyLinkedList FreeList; RegionStats Stats; - bool CanRelease; bool Exhausted; u32 RandState; uptr RegionBeg; @@ -417,7 +408,7 @@ const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); - CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); + DCHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); const uptr BytesInFreeList = Region->AllocatedUser - (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -52,20 +52,20 @@ PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion, uptr MaxValue) : Regions(NumberOfRegions), NumCounters(CountersPerRegion) { - CHECK_GT(Regions, 0); - CHECK_GT(NumCounters, 0); - CHECK_GT(MaxValue, 0); + DCHECK_GT(Regions, 0); + DCHECK_GT(NumCounters, 0); + DCHECK_GT(MaxValue, 0); constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL; // Rounding counter storage size up to the power of two allows for using // bit shifts calculating particular counter's Index and offset. const uptr CounterSizeBits = roundUpToPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1); - CHECK_LE(CounterSizeBits, MaxCounterBits); + DCHECK_LE(CounterSizeBits, MaxCounterBits); CounterSizeBitsLog = getLog2(CounterSizeBits); CounterMask = ~(static_cast(0)) >> (MaxCounterBits - CounterSizeBits); const uptr PackingRatio = MaxCounterBits >> CounterSizeBitsLog; - CHECK_GT(PackingRatio, 0); + DCHECK_GT(PackingRatio, 0); PackingRatioLog = getLog2(PackingRatio); BitOffsetMask = PackingRatio - 1; @@ -235,20 +235,14 @@ if (BlockSize <= PageSize && PageSize % BlockSize == 0) { // Each chunk affects one page only. for (const auto &It : FreeList) { - // If dealing with a TransferBatch, the first pointer of the batch will - // point to the batch itself, we do not want to mark this for release as - // the batch is in use, so skip the first entry. - const bool IsTransferBatch = - (It.getCount() != 0) && - (reinterpret_cast(It.get(0)) == reinterpret_cast(&It)); - for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { + for (u32 I = 0; I < It.getCount(); I++) { const uptr P = reinterpret_cast(It.get(I)) - Base; // This takes care of P < Base and P >= Base + RoundedSize. - if (P < RoundedSize) { - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - const uptr PInRegion = P - RegionIndex * RegionSize; - Counters.inc(RegionIndex, PInRegion >> PageSizeLog); - } + if (UNLIKELY(P >= RoundedSize)) + continue; + const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; + const uptr PInRegion = P - RegionIndex * RegionSize; + Counters.inc(RegionIndex, PInRegion >> PageSizeLog); } } } else { @@ -256,27 +250,23 @@ DCHECK_GE(RegionSize, BlockSize); const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize; for (const auto &It : FreeList) { - // See TransferBatch comment above. - const bool IsTransferBatch = - (It.getCount() != 0) && - (reinterpret_cast(It.get(0)) == reinterpret_cast(&It)); - for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) { + for (u32 I = 0; I < It.getCount(); I++) { const uptr P = reinterpret_cast(It.get(I)) - Base; // This takes care of P < Base and P >= Base + RoundedSize. - if (P < RoundedSize) { - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - uptr PInRegion = P - RegionIndex * RegionSize; - Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, - (PInRegion + BlockSize - 1) >> PageSizeLog); - // The last block in a region might straddle a page, so if it's - // free, we mark the following "pretend" memory block(s) as free. - if (PInRegion == LastBlockInRegion) { + if (UNLIKELY(P >= RoundedSize)) + continue; + const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; + uptr PInRegion = P - RegionIndex * RegionSize; + Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, + (PInRegion + BlockSize - 1) >> PageSizeLog); + // The last block in a region might straddle a page, so if it's + // free, we mark the following "pretend" memory block(s) as free. + if (PInRegion == LastBlockInRegion) { + PInRegion += BlockSize; + while (PInRegion < RoundedRegionSize) { + Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, + (PInRegion + BlockSize - 1) >> PageSizeLog); PInRegion += BlockSize; - while (PInRegion < RoundedRegionSize) { - Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, - (PInRegion + BlockSize - 1) >> PageSizeLog); - PInRegion += BlockSize; - } } } } @@ -327,7 +317,6 @@ } } PrevPageBoundary = PageBoundary; - RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage); } }