diff --git a/compiler-rt/lib/scudo/standalone/list.h b/compiler-rt/lib/scudo/standalone/list.h --- a/compiler-rt/lib/scudo/standalone/list.h +++ b/compiler-rt/lib/scudo/standalone/list.h @@ -55,6 +55,7 @@ ConstIterator end() const { return ConstIterator(nullptr); } void checkConsistency() const; + void sort(bool (*compare)(T *A, T *B)); protected: uptr Size; @@ -78,6 +79,42 @@ } } +// Standard bubble sort, pointers are swapped instead of the data. +template void IntrusiveList::sort(bool (*compare)(T *A, T *B)) { + bool Swapped; + T *I, *End = nullptr; + do { + Swapped = false; + I = First; + T *Before = nullptr; + while (I->Next != End) { + T *After = I->Next; + if (compare(I, After)) { + if (First == I) { + DCHECK_EQ(Before, nullptr); + First = After; + } + if (Last == After) { + DCHECK_EQ(After->Next, nullptr); + Last = I; + } + if (Before) { + DCHECK_EQ(Before->Next, I); + Before->Next = After; + } + I->Next = After->Next; + After->Next = I; + Before = After; + Swapped = true; + } else { + Before = I; + I = After; + } + } + End = I; + } while (Swapped); +} + template struct SinglyLinkedList : public IntrusiveList { using IntrusiveList::First; using IntrusiveList::Last; diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h --- a/compiler-rt/lib/scudo/standalone/local_cache.h +++ b/compiler-rt/lib/scudo/standalone/local_cache.h @@ -23,9 +23,10 @@ void setFromArray(void **Array, u32 N) { DCHECK_LE(N, MaxNumCached); Count = N; + ReleaseHint = 0; memcpy(Batch, Array, sizeof(void *) * Count); } - void clear() { Count = 0; } + void clear() { ReleaseHint = Count = 0; } void add(void *P) { DCHECK_LT(Count, MaxNumCached); Batch[Count++] = P; @@ -41,10 +42,18 @@ static u32 getMaxCached(uptr Size) { return Min(MaxNumCached, SizeClassMap::getMaxCachedHint(Size)); } + void incReleaseHint() { + ReleaseHint++; + DCHECK_LE(ReleaseHint, MaxNumCached); + } + u32 getReleaseHint() const { return ReleaseHint; } + void clearReleaseHint() { ReleaseHint = 0; } + TransferBatch *Next; private: u32 Count; + u32 ReleaseHint; void *Batch[MaxNumCached]; }; diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -379,6 +379,7 @@ bool Force = false) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); + const bool SortBatches = static_cast(SCUDO_ANDROID); CHECK_GE(Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks); const uptr BytesInFreeList = @@ -403,6 +404,11 @@ } } + if (SortBatches) { + for (auto &It : Sci->FreeList) + It.clearReleaseHint(); + } + // TODO(kostyak): currently not ideal as we loop over all regions and // iterate multiple times over the same freelist if a ClassId spans multiple // regions. But it will have to do for now. @@ -412,7 +418,7 @@ const uptr Region = I * RegionSize; ReleaseRecorder Recorder(Region); releaseFreeMemoryToOS(Sci->FreeList, Region, RegionSize / PageSize, - BlockSize, &Recorder); + BlockSize, &Recorder, SortBatches); if (Recorder.getReleasedRangesCount() > 0) { Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); @@ -421,6 +427,12 @@ } } } + if (SortBatches) { + Sci->FreeList.sort([](TransferBatch *A, TransferBatch *B) -> bool { + return A->getReleaseHint() > B->getReleaseHint(); + }); + } + Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); return TotalReleasedBytes; } diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -388,6 +388,11 @@ bool Force = false) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); + // TODO(kostyak): currently only enabled on Android, make it configurable. + // By sorting the batches based on how "reclaimed" they are, we basically + // reduce the range of memory we are working with, which will have + // consequences with regard to predictability of the allocation patterns. + const bool SortBatches = static_cast(SCUDO_ANDROID); CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); const uptr BytesInFreeList = @@ -413,16 +418,26 @@ } } + if (SortBatches) { + for (auto &It : Region->FreeList) + It.clearReleaseHint(); + } + ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data); releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg, roundUpTo(Region->AllocatedUser, PageSize) / PageSize, - BlockSize, &Recorder); + BlockSize, &Recorder, SortBatches); if (Recorder.getReleasedRangesCount() > 0) { Region->ReleaseInfo.PushedBlocksAtLastRelease = Region->Stats.PushedBlocks; Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + if (SortBatches) { + Region->FreeList.sort([](TransferBatch *A, TransferBatch *B) -> bool { + return A->getReleaseHint() > B->getReleaseHint(); + }); + } } Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); return Recorder.getReleasedBytes(); diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -66,9 +66,12 @@ PackingRatioLog = getLog2(PackingRatio); BitOffsetMask = PackingRatio - 1; - BufferSize = (roundUpTo(N, static_cast(1U) << PackingRatioLog) >> - PackingRatioLog) * - sizeof(*Buffer); + const uptr PackedCountersSize = + (roundUpTo(N, static_cast(1U) << PackingRatioLog) >> + PackingRatioLog) * + sizeof(*Buffer); + const uptr MetaDataSize = roundUpTo(N, 8U) / 8U; + BufferSize = PackedCountersSize + MetaDataSize; if (BufferSize <= StaticBufferSize && Mutex.tryLock()) { Buffer = &StaticBuffer[0]; memset(Buffer, 0, BufferSize); @@ -76,6 +79,8 @@ Buffer = reinterpret_cast( map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM)); } + MetaData = reinterpret_cast(reinterpret_cast(Buffer) + + PackedCountersSize); } ~PackedCounterArray() { if (!isAllocated()) @@ -90,6 +95,17 @@ uptr getCount() const { return N; } + void setMetaData(uptr I) { + DCHECK_LT(I, N); + DCHECK_EQ(MetaData[I / 8] & (1U << (I & 7)), 0U); + MetaData[I / 8] |= static_cast(1U << (I & 7)); + } + + u8 getMetaData(uptr I) { + DCHECK_LT(I, N); + return MetaData[I / 8] & (1U << (I & 7)) ? 1U : 0U; + } + uptr get(uptr I) const { DCHECK_LT(I, N); const uptr Index = I >> PackingRatioLog; @@ -111,7 +127,9 @@ inc(I); } - uptr getBufferSize() const { return BufferSize; } + uptr getPackedCountersSize() const { + return BufferSize - roundUpTo(N, 8U) / 8U; + } private: const uptr N; @@ -122,10 +140,12 @@ uptr BufferSize; uptr *Buffer; + u8 *MetaData; static HybridMutex Mutex; static const uptr StaticBufferSize = 1024U; static uptr StaticBuffer[StaticBufferSize]; + // MetaData holds an extra bit per counter; }; template class FreePagesRangeTracker { @@ -167,7 +187,8 @@ NOINLINE void releaseFreeMemoryToOS(const IntrusiveList &FreeList, uptr Base, uptr AllocatedPagesCount, uptr BlockSize, - ReleaseRecorderT *Recorder) { + ReleaseRecorderT *Recorder, + bool UpdateReleaseHint = false) { const uptr PageSize = getPageSizeCached(); // Figure out the number of chunks per page and whether we can take a fast @@ -249,8 +270,12 @@ FreePagesRangeTracker RangeTracker(Recorder); if (SameBlockCountPerPage) { // Fast path, every page has the same number of chunks affecting it. - for (uptr I = 0; I < Counters.getCount(); I++) - RangeTracker.processNextPage(Counters.get(I) == FullPagesBlockCountMax); + for (uptr I = 0; I < Counters.getCount(); I++) { + const bool Freed = Counters.get(I) == FullPagesBlockCountMax; + if (UpdateReleaseHint && Freed) + Counters.setMetaData(I); + RangeTracker.processNextPage(Freed); + } } else { // Slow path, go through the pages keeping count how many chunks affect // each page. @@ -277,10 +302,26 @@ } PrevPageBoundary = PageBoundary; - RangeTracker.processNextPage(Counters.get(I) == BlocksPerPage); + const bool Freed = Counters.get(I) == BlocksPerPage; + if (UpdateReleaseHint && Freed) + Counters.setMetaData(I); + RangeTracker.processNextPage(Freed); } } RangeTracker.finish(); + + if (!UpdateReleaseHint || Recorder->getReleasedRangesCount() == 0) + return; + for (auto &It : const_cast &>(FreeList)) { + for (u32 I = 0; I < It.getCount(); I++) { + const uptr P = reinterpret_cast(It.get(I)); + if (P < Base || P >= End) + continue; + for (uptr J = P; J <= P + BlockSize - 1; J += PageSize) + if (Counters.getMetaData((J - Base) >> PageSizeLog)) + It.incReleaseHint(); + } + } } } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp @@ -22,15 +22,15 @@ for (scudo::uptr I = 0; I < SCUDO_WORDSIZE; I++) { // Various valid counter's max values packed into one word. scudo::PackedCounterArray Counters2N(1, 1UL << I); - EXPECT_EQ(sizeof(scudo::uptr), Counters2N.getBufferSize()); + EXPECT_EQ(sizeof(scudo::uptr), Counters2N.getPackedCountersSize()); // Check the "all bit set" values too. scudo::PackedCounterArray Counters2N1_1(1, ~0UL >> I); - EXPECT_EQ(sizeof(scudo::uptr), Counters2N1_1.getBufferSize()); + EXPECT_EQ(sizeof(scudo::uptr), Counters2N1_1.getPackedCountersSize()); // Verify the packing ratio, the counter is Expected to be packed into the // closest power of 2 bits. scudo::PackedCounterArray Counters(SCUDO_WORDSIZE, 1UL << I); EXPECT_EQ(sizeof(scudo::uptr) * scudo::roundUpToPowerOfTwo(I + 1), - Counters.getBufferSize()); + Counters.getPackedCountersSize()); } // Go through 1, 2, 4, 8, .. {32,64} bits per counter. @@ -123,6 +123,8 @@ for (scudo::uptr I = From; I < To; I += PageSize) ReportedPages.insert(I); } + + scudo::uptr getReleasedRangesCount() const { return ReportedPages.size(); } }; // Simplified version of a TransferBatch. @@ -138,6 +140,10 @@ DCHECK_LE(I, Count); return Batch[I]; } + void incReleaseHint() const {} + scudo::u32 getReleaseHint() const { return 0; } + void clearReleaseHint() const {} + FreeBatch *Next; private: diff --git a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp @@ -28,7 +28,6 @@ testSizeClassMap(); } - struct OneClassSizeClassConfig { static const scudo::uptr NumBits = 1; static const scudo::uptr MinSizeLog = 5;