diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h --- a/compiler-rt/lib/scudo/standalone/local_cache.h +++ b/compiler-rt/lib/scudo/standalone/local_cache.h @@ -35,6 +35,15 @@ // u16 will be promoted to int by arithmetic type conversion. Count = static_cast(Count + N); } + void appendFromTransferBatch(TransferBatch *B, u16 N) { + DCHECK_LE(N, MaxNumCached - Count); + DCHECK_GE(B->Count, N); + // Append from the back of `B`. + memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N); + // u16 will be promoted to int by arithmetic type conversion. + Count = static_cast(Count + N); + B->Count = static_cast(B->Count - N); + } void clear() { Count = 0; } void add(CompactPtrT P) { DCHECK_LT(Count, MaxNumCached); @@ -44,7 +53,7 @@ memcpy(Array, Batch, sizeof(Batch[0]) * Count); } u16 getCount() const { return Count; } - CompactPtrT *getRawArray() { return Batch; } + bool isEmpty() const { return Count == 0U; } CompactPtrT get(u16 I) const { DCHECK_LE(I, Count); return Batch[I]; diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -984,8 +984,6 @@ NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, ReleaseToOS ReleaseType = ReleaseToOS::Normal) REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { - // TODO(chiahungduan): Release `FLLock` in step 3 & 4 described in the - // comment below. ScopedLock L(Region->FLLock); const uptr BlockSize = getSizeByClassId(ClassId); @@ -1010,10 +1008,6 @@ return 0; } - // This is only used for debugging to ensure the consistency of the number - // of groups. - uptr NumberOfBatchGroups = Region->FreeListInfo.BlockList.size(); - // ====================================================================== // // 2. Determine which groups can release the pages. Use a heuristic to // gather groups that are candidates for doing a release. @@ -1029,39 +1023,71 @@ if (GroupsToRelease.empty()) return 0; - // ====================================================================== // - // 3. Mark the free blocks in `GroupsToRelease` in the `PageReleaseContext`. - // Then we can tell which pages are in-use by querying - // `PageReleaseContext`. - // ====================================================================== // - PageReleaseContext Context = markFreeBlocks( - Region, BlockSize, AllocatedUserEnd, CompactPtrBase, GroupsToRelease); - if (UNLIKELY(!Context.hasBlockMarked())) { - mergeGroupsToReleaseBack(Region, GroupsToRelease, NumberOfBatchGroups); - return 0; - } + // Ideally, we should use a class like `ScopedUnlock`. However, this form of + // unlocking is not supported by the thread-safety analysis. See + // https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#no-alias-analysis + // for more details. + // Put it as local class so that we can mark the ctor/dtor with proper + // annotations associated to the target lock. Note that accessing the + // function variable in local class only works in thread-safety annotations. + // TODO: Implement general `ScopedUnlock` when it's supported. + class FLLockScopedUnlock { + public: + FLLockScopedUnlock(RegionInfo *Region) RELEASE(Region->FLLock) + : R(Region) { + R->FLLock.assertHeld(); + R->FLLock.unlock(); + } + ~FLLockScopedUnlock() ACQUIRE(Region->FLLock) { R->FLLock.lock(); } - // ====================================================================== // - // 4. Release the unused physical pages back to the OS. - // ====================================================================== // - RegionReleaseRecorder Recorder(&Region->MemMapInfo.MemMap, - Region->RegionBeg, - Context.getReleaseOffset()); - auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; - releaseFreeMemoryToOS(Context, Recorder, SkipRegion); - if (Recorder.getReleasedRangesCount() > 0) { - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; - Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); - Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + private: + RegionInfo *R; + }; + + // Note that we have extracted the `GroupsToRelease` from region freelist. + // It's safe to let pushBlocks()/popBatches() access the remaining region + // freelist. In the steps 3 and 4, we will temporarily release the FLLock + // and lock it again before step 5. + + uptr ReleasedBytes = 0; + { + FLLockScopedUnlock UL(Region); + // ==================================================================== // + // 3. Mark the free blocks in `GroupsToRelease` in the + // `PageReleaseContext`. Then we can tell which pages are in-use by + // querying `PageReleaseContext`. + // ==================================================================== // + PageReleaseContext Context = markFreeBlocks( + Region, BlockSize, AllocatedUserEnd, CompactPtrBase, GroupsToRelease); + if (UNLIKELY(!Context.hasBlockMarked())) { + ScopedLock L(Region->FLLock); + mergeGroupsToReleaseBack(Region, GroupsToRelease); + return 0; + } + + // ==================================================================== // + // 4. Release the unused physical pages back to the OS. + // ==================================================================== // + RegionReleaseRecorder Recorder(&Region->MemMapInfo.MemMap, + Region->RegionBeg, + Context.getReleaseOffset()); + auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + if (Recorder.getReleasedRangesCount() > 0) { + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); + Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + } + Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); + ReleasedBytes = Recorder.getReleasedBytes(); } - Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); // ====================================================================== // // 5. Merge the `GroupsToRelease` back to the freelist. // ====================================================================== // - mergeGroupsToReleaseBack(Region, GroupsToRelease, NumberOfBatchGroups); + mergeGroupsToReleaseBack(Region, GroupsToRelease); - return Recorder.getReleasedBytes(); + return ReleasedBytes; } bool hasChanceToReleasePages(RegionInfo *Region, uptr BlockSize, @@ -1298,7 +1324,7 @@ markFreeBlocks(RegionInfo *Region, const uptr BlockSize, const uptr AllocatedUserEnd, const uptr CompactPtrBase, SinglyLinkedList &GroupsToRelease) - REQUIRES(Region->MMLock, Region->FLLock) { + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { const uptr GroupSize = (1U << GroupSizeLog); auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) { return decompactPtrInternal(CompactPtrBase, CompactPtr); @@ -1347,9 +1373,12 @@ BG.Batches.front()->getCount(); if (NumBlocks == MaxContainedBlocks) { - for (const auto &It : BG.Batches) + for (const auto &It : BG.Batches) { + if (&It != BG.Batches.front()) + DCHECK_EQ(It.getCount(), BG.MaxCachedPerBatch); for (u16 I = 0; I < It.getCount(); ++I) DCHECK_EQ(compactPtrGroup(It.get(I)), BG.CompactPtrGroupBase); + } Context.markRangeAsAllCounted(BatchGroupBase, BatchGroupUsedEnd, Region->RegionBeg, /*RegionIndex=*/0, @@ -1371,9 +1400,22 @@ } void mergeGroupsToReleaseBack(RegionInfo *Region, - SinglyLinkedList &GroupsToRelease, - const uptr NumberOfBatchGroups) + SinglyLinkedList &GroupsToRelease) REQUIRES(Region->MMLock, Region->FLLock) { + // After merging two freelists, we may have redundant `BatchGroup`s that + // need to be recycled. The number of unused `BatchGroup`s is expected to be + // small. Pick a constant which is inferred from real programs. + constexpr uptr MaxUnusedSize = 8; + CompactPtrT Blocks[MaxUnusedSize]; + u32 Idx = 0; + RegionInfo *BatchClassRegion = getRegionInfo(SizeClassMap::BatchClassId); + // We can't call pushBatchClassBlocks() to recycle the unused `BatchGroup`s + // when we are manipulating the freelist of `BatchClassRegion`. Instead, we + // should just push it back to the freelist when we merge two `BatchGroup`s. + // This logic hasn't been implemented because we haven't supported releasing + // pages in `BatchClassRegion`. + DCHECK_NE(BatchClassRegion, Region); + // Merge GroupsToRelease back to the Region::FreeListInfo.BlockList. Note // that both `Region->FreeListInfo.BlockList` and `GroupsToRelease` are // sorted. @@ -1386,8 +1428,7 @@ break; } - DCHECK_NE(BG->CompactPtrGroupBase, - GroupsToRelease.front()->CompactPtrGroupBase); + DCHECK(!BG->Batches.empty()); if (BG->CompactPtrGroupBase < GroupsToRelease.front()->CompactPtrGroupBase) { @@ -1396,6 +1437,65 @@ continue; } + BatchGroup *Cur = GroupsToRelease.front(); + TransferBatch *UnusedTransferBatch = nullptr; + GroupsToRelease.pop_front(); + + if (BG->CompactPtrGroupBase == Cur->CompactPtrGroupBase) { + BG->PushedBlocks += Cur->PushedBlocks; + // We have updated `BatchGroup::BytesInBGAtLastCheckpoint` while + // collecting the `GroupsToRelease`. + BG->BytesInBGAtLastCheckpoint = Cur->BytesInBGAtLastCheckpoint; + const uptr MaxCachedPerBatch = BG->MaxCachedPerBatch; + + // Note that the first TransferBatches in both `Batches` may not be + // full and only the first TransferBatch can have non-full blocks. Thus + // we have to merge them before appending one to another. + if (Cur->Batches.front()->getCount() == MaxCachedPerBatch) { + BG->Batches.append_back(&Cur->Batches); + } else { + TransferBatch *NonFullBatch = Cur->Batches.front(); + Cur->Batches.pop_front(); + const u16 NonFullBatchCount = NonFullBatch->getCount(); + // The remaining Batches in `Cur` are full. + BG->Batches.append_back(&Cur->Batches); + + if (BG->Batches.front()->getCount() == MaxCachedPerBatch) { + // Only 1 non-full TransferBatch, push it to the front and append + // the remaining. + BG->Batches.push_front(NonFullBatch); + } else { + const u16 NumBlocksToMove = static_cast( + Min(static_cast(MaxCachedPerBatch - + BG->Batches.front()->getCount()), + NonFullBatchCount)); + BG->Batches.front()->appendFromTransferBatch(NonFullBatch, + NumBlocksToMove); + if (NonFullBatch->isEmpty()) + UnusedTransferBatch = NonFullBatch; + else + BG->Batches.push_front(NonFullBatch); + } + } + + const u32 NeededSlots = UnusedTransferBatch == nullptr ? 1U : 2U; + if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) { + ScopedLock L(BatchClassRegion->FLLock); + pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); + Idx = 0; + } + Blocks[Idx++] = + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(Cur)); + if (UnusedTransferBatch) { + Blocks[Idx++] = + compactPtr(SizeClassMap::BatchClassId, + reinterpret_cast(UnusedTransferBatch)); + } + Prev = BG; + BG = BG->Next; + continue; + } + // At here, the `BG` is the first BatchGroup with CompactPtrGroupBase // larger than the first element in `GroupsToRelease`. We need to insert // `GroupsToRelease::front()` (which is `Cur` below) before `BG`. @@ -1406,8 +1506,6 @@ // // Afterwards, we don't need to advance `BG` because the order between // `BG` and the new `GroupsToRelease::front()` hasn't been checked. - BatchGroup *Cur = GroupsToRelease.front(); - GroupsToRelease.pop_front(); if (Prev == nullptr) Region->FreeListInfo.BlockList.push_front(Cur); else @@ -1416,8 +1514,10 @@ Prev = Cur; } - DCHECK_EQ(Region->FreeListInfo.BlockList.size(), NumberOfBatchGroups); - (void)NumberOfBatchGroups; + if (Idx != 0) { + ScopedLock L(BatchClassRegion->FLLock); + pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); + } if (SCUDO_DEBUG) { BatchGroup *Prev = Region->FreeListInfo.BlockList.front(); diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -308,10 +308,18 @@ if (P) V.push_back(std::make_pair(ClassId, P)); } + + // Try to interleave pushBlocks(), popBatch() and releaseToOS(). + Allocator->releaseToOS(scudo::ReleaseToOS::Force); + while (!V.empty()) { auto Pair = V.back(); Cache.deallocate(Pair.first, Pair.second); V.pop_back(); + // This increases the chance of having non-full TransferBatches and it + // will jump into the code path of merging TransferBatches. + if (std::rand() % 8 == 0) + Cache.drain(); } Cache.destroy(nullptr); });