diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt --- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt @@ -30,6 +30,10 @@ list(APPEND SCUDO_CFLAGS -O3) endif() +if (CMAKE_CXX_COMPILER_ID MATCHES Clang) + list(APPEND SCUDO_CFLAGS -Werror=thread-safety) +endif() + set(SCUDO_LINK_FLAGS) list(APPEND SCUDO_LINK_FLAGS -Wl,-z,defs,-z,now,-z,relro) diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -245,9 +245,9 @@ // - unlinking the local stats from the global ones (destroying the cache does // the last two items). void commitBack(TSD *TSD) { - Quarantine.drain(&TSD->QuarantineCache, - QuarantineCallback(*this, TSD->Cache)); - TSD->Cache.destroy(&Stats); + Quarantine.drain(&TSD->getQuarantineCache(), + QuarantineCallback(*this, TSD->getCache())); + TSD->getCache().destroy(&Stats); } ALWAYS_INLINE void *getHeaderTaggedPointer(void *Ptr) { @@ -301,7 +301,7 @@ NOINLINE void *allocate(uptr Size, Chunk::Origin Origin, uptr Alignment = MinAlignment, - bool ZeroContents = false) { + bool ZeroContents = false) NO_THREAD_SAFETY_ANALYSIS { initThreadMaybe(); const Options Options = Primary.Options.load(); @@ -371,23 +371,24 @@ DCHECK_NE(ClassId, 0U); bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); - Block = TSD->Cache.allocate(ClassId); + Block = TSD->getCache().allocate(ClassId); // If the allocation failed, the most likely reason with a 32-bit primary // is the region being full. In that event, retry in each successively // larger class until it fits. If it fails to fit in the largest class, // fallback to the Secondary. if (UNLIKELY(!Block)) { while (ClassId < SizeClassMap::LargestClassId && !Block) - Block = TSD->Cache.allocate(++ClassId); + Block = TSD->getCache().allocate(++ClassId); if (!Block) ClassId = 0; } if (UnlockRequired) TSD->unlock(); } - if (UNLIKELY(ClassId == 0)) + if (UNLIKELY(ClassId == 0)) { Block = Secondary.allocate(Options, Size, Alignment, &SecondaryBlockEnd, FillContents); + } if (UNLIKELY(!Block)) { if (Options.get(OptionBit::MayReturnNull)) @@ -691,7 +692,7 @@ // TODO(kostyak): disable() is currently best-effort. There are some small // windows of time when an allocation could still succeed after // this function finishes. We will revisit that later. - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { initThreadMaybe(); #ifdef GWP_ASAN_HOOKS GuardedAlloc.disable(); @@ -703,7 +704,7 @@ Secondary.disable(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { initThreadMaybe(); Secondary.enable(); Primary.enable(); @@ -722,9 +723,7 @@ // sizing purposes. uptr getStats(char *Buffer, uptr Size) { ScopedString Str; - disable(); const uptr Length = getStats(&Str) + 1; - enable(); if (Length < Size) Size = Length; if (Buffer && Size) { @@ -736,9 +735,7 @@ void printStats() { ScopedString Str; - disable(); getStats(&Str); - enable(); Str.output(); } @@ -1101,7 +1098,8 @@ } void quarantineOrDeallocateChunk(Options Options, void *TaggedPtr, - Chunk::UnpackedHeader *Header, uptr Size) { + Chunk::UnpackedHeader *Header, + uptr Size) NO_THREAD_SAFETY_ANALYSIS { void *Ptr = getHeaderTaggedPointer(TaggedPtr); Chunk::UnpackedHeader NewHeader = *Header; // If the quarantine is disabled, the actual size of a chunk is 0 or larger @@ -1143,7 +1141,7 @@ if (LIKELY(ClassId)) { bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); - TSD->Cache.deallocate(ClassId, BlockBegin); + TSD->getCache().deallocate(ClassId, BlockBegin); if (UnlockRequired) TSD->unlock(); } else { @@ -1155,8 +1153,8 @@ } else { bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); - Quarantine.put(&TSD->QuarantineCache, - QuarantineCallback(*this, TSD->Cache), Ptr, Size); + Quarantine.put(&TSD->getQuarantineCache(), + QuarantineCallback(*this, TSD->getCache()), Ptr, Size); if (UnlockRequired) TSD->unlock(); } diff --git a/compiler-rt/lib/scudo/standalone/fuchsia.cpp b/compiler-rt/lib/scudo/standalone/fuchsia.cpp --- a/compiler-rt/lib/scudo/standalone/fuchsia.cpp +++ b/compiler-rt/lib/scudo/standalone/fuchsia.cpp @@ -195,6 +195,8 @@ sync_mutex_unlock(&M); } +void HybridMutex::assertHeldImpl() __TA_NO_THREAD_SAFETY_ANALYSIS {} + u64 getMonotonicTime() { return _zx_clock_get_monotonic(); } u32 getNumberOfCPUs() { return _zx_system_get_num_cpus(); } diff --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp --- a/compiler-rt/lib/scudo/standalone/linux.cpp +++ b/compiler-rt/lib/scudo/standalone/linux.cpp @@ -11,6 +11,7 @@ #if SCUDO_LINUX #include "common.h" +#include "internal_defs.h" #include "linux.h" #include "mutex.h" #include "string_utils.h" @@ -128,6 +129,10 @@ } } +void HybridMutex::assertHeldImpl() { + CHECK(atomic_load(&M, memory_order_acquire) != Unlocked); +} + u64 getMonotonicTime() { timespec TS; clock_gettime(CLOCK_MONOTONIC, &TS); diff --git a/compiler-rt/lib/scudo/standalone/mutex.h b/compiler-rt/lib/scudo/standalone/mutex.h --- a/compiler-rt/lib/scudo/standalone/mutex.h +++ b/compiler-rt/lib/scudo/standalone/mutex.h @@ -11,6 +11,7 @@ #include "atomic_helpers.h" #include "common.h" +#include "thread_annotations.h" #include @@ -20,10 +21,10 @@ namespace scudo { -class HybridMutex { +class CAPABILITY("mutex") HybridMutex { public: - bool tryLock(); - NOINLINE void lock() { + bool tryLock() TRY_ACQUIRE(true); + NOINLINE void lock() ACQUIRE() { if (LIKELY(tryLock())) return; // The compiler may try to fully unroll the loop, ending up in a @@ -40,9 +41,20 @@ } lockSlow(); } - void unlock(); + void unlock() RELEASE(); + + // TODO(chiahungduan): In general, we may want to assert the owner of lock as + // well. Given the current uses of HybridMutex, it's acceptable without + // asserting the owner. Re-evaluate this when we have certain scenarios which + // requires a more fine-grained lock granularity. + ALWAYS_INLINE void assertHeld() ASSERT_CAPABILITY(this) { + if (SCUDO_DEBUG) + assertHeldImpl(); + } private: + void assertHeldImpl(); + static constexpr u8 NumberOfTries = 8U; static constexpr u8 NumberOfYields = 8U; @@ -52,13 +64,13 @@ sync_mutex_t M = {}; #endif - void lockSlow(); + void lockSlow() ACQUIRE(); }; -class ScopedLock { +class SCOPED_CAPABILITY ScopedLock { public: - explicit ScopedLock(HybridMutex &M) : Mutex(M) { Mutex.lock(); } - ~ScopedLock() { Mutex.unlock(); } + explicit ScopedLock(HybridMutex &M) ACQUIRE(M) : Mutex(M) { Mutex.lock(); } + ~ScopedLock() RELEASE() { Mutex.unlock(); } private: HybridMutex &Mutex; diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -18,6 +18,7 @@ #include "report.h" #include "stats.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -62,7 +63,7 @@ static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } - void init(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { if (SCUDO_FUCHSIA) reportError("SizeClassAllocator32 is not supported on Fuchsia"); @@ -86,7 +87,7 @@ setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } - void unmapTestOnly() { + void unmapTestOnly() NO_THREAD_SAFETY_ANALYSIS { while (NumberOfStashedRegions > 0) unmap(reinterpret_cast(RegionsStash[--NumberOfStashedRegions]), RegionSize); @@ -121,11 +122,11 @@ DCHECK_LT(ClassId, NumClasses); SizeClassInfo *Sci = getSizeClassInfo(ClassId); ScopedLock L(Sci->Mutex); - TransferBatch *B = popBatchImpl(C, ClassId); + TransferBatch *B = popBatchImpl(C, ClassId, Sci); if (UNLIKELY(!B)) { if (UNLIKELY(!populateFreeList(C, ClassId, Sci))) return nullptr; - B = popBatchImpl(C, ClassId); + B = popBatchImpl(C, ClassId, Sci); // if `populateFreeList` succeeded, we are supposed to get free blocks. DCHECK_NE(B, nullptr); } @@ -149,7 +150,7 @@ // the blocks. if (Size == 1 && !populateFreeList(C, ClassId, Sci)) return; - pushBlocksImpl(C, ClassId, Array, Size); + pushBlocksImpl(C, ClassId, Sci, Array, Size); Sci->Stats.PushedBlocks += Size; return; } @@ -173,14 +174,14 @@ } ScopedLock L(Sci->Mutex); - pushBlocksImpl(C, ClassId, Array, Size, SameGroup); + pushBlocksImpl(C, ClassId, Sci, Array, Size, SameGroup); Sci->Stats.PushedBlocks += Size; if (ClassId != SizeClassMap::BatchClassId) releaseToOSMaybe(Sci, ClassId); } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { // The BatchClassId must be locked last since other classes can use it. for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { if (static_cast(I) == SizeClassMap::BatchClassId) @@ -192,7 +193,7 @@ PossibleRegions.disable(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { PossibleRegions.enable(); RegionsStashMutex.unlock(); getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); @@ -207,6 +208,10 @@ uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); + // TODO: The call of `iterateOverBlocks` requires to disabling the + // SizeClassAllocator32. We may consider to locking each region on demand + // only. + Sci->Mutex.assertHeld(); if (Sci->MinRegionIndex < MinRegionIndex) MinRegionIndex = Sci->MinRegionIndex; if (Sci->MaxRegionIndex > MaxRegionIndex) @@ -230,6 +235,7 @@ uptr PushedBlocks = 0; for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); TotalMapped += Sci->AllocatedUser; PoppedBlocks += Sci->Stats.PoppedBlocks; PushedBlocks += Sci->Stats.PushedBlocks; @@ -237,8 +243,11 @@ Str->append("Stats: SizeClassAllocator32: %zuM mapped in %zu allocations; " "remains %zu\n", TotalMapped >> 20, PoppedBlocks, PoppedBlocks - PushedBlocks); - for (uptr I = 0; I < NumClasses; I++) - getStats(Str, I, 0); + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); + getStats(Str, I, Sci, 0); + } } bool setOption(Option O, sptr Value) { @@ -297,17 +306,17 @@ struct alignas(SCUDO_CACHE_LINE_SIZE) SizeClassInfo { HybridMutex Mutex; - SinglyLinkedList FreeList; - uptr CurrentRegion; - uptr CurrentRegionAllocated; - SizeClassStats Stats; + SinglyLinkedList FreeList GUARDED_BY(Mutex); + uptr CurrentRegion GUARDED_BY(Mutex); + uptr CurrentRegionAllocated GUARDED_BY(Mutex); + SizeClassStats Stats GUARDED_BY(Mutex); u32 RandState; - uptr AllocatedUser; + uptr AllocatedUser GUARDED_BY(Mutex); // Lowest & highest region index allocated for this size class, to avoid // looping through the whole NumRegions. - uptr MinRegionIndex; - uptr MaxRegionIndex; - ReleaseToOsInfo ReleaseInfo; + uptr MinRegionIndex GUARDED_BY(Mutex); + uptr MaxRegionIndex GUARDED_BY(Mutex); + ReleaseToOsInfo ReleaseInfo GUARDED_BY(Mutex); }; static_assert(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); @@ -342,7 +351,7 @@ return Region; } - uptr allocateRegion(SizeClassInfo *Sci, uptr ClassId) { + uptr allocateRegion(SizeClassInfo *Sci, uptr ClassId) REQUIRES(Sci->Mutex) { DCHECK_LT(ClassId, NumClasses); uptr Region = 0; { @@ -395,10 +404,10 @@ // `SameGroup=true` instead. // // The region mutex needs to be held while calling this method. - void pushBlocksImpl(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size, - bool SameGroup = false) { + void pushBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci, + CompactPtrT *Array, u32 Size, bool SameGroup = false) + REQUIRES(Sci->Mutex) { DCHECK_GT(Size, 0U); - SizeClassInfo *Sci = getSizeClassInfo(ClassId); auto CreateGroup = [&](uptr GroupId) { BatchGroup *BG = nullptr; @@ -524,8 +533,8 @@ // group id will be considered first. // // The region mutex needs to be held while calling this method. - TransferBatch *popBatchImpl(CacheT *C, uptr ClassId) { - SizeClassInfo *Sci = getSizeClassInfo(ClassId); + TransferBatch *popBatchImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci) + REQUIRES(Sci->Mutex) { if (Sci->FreeList.empty()) return nullptr; @@ -553,7 +562,8 @@ return B; } - NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) { + NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) + REQUIRES(Sci->Mutex) { uptr Region; uptr Offset; // If the size-class currently has a region associated to it, use it. The @@ -608,7 +618,7 @@ // it only happens when it crosses the group size boundary. Instead of // sorting them, treat them as same group here to avoid sorting the // almost-sorted blocks. - pushBlocksImpl(C, ClassId, &ShuffleArray[I], N, /*SameGroup=*/true); + pushBlocksImpl(C, ClassId, Sci, &ShuffleArray[I], N, /*SameGroup=*/true); I += N; } @@ -629,8 +639,8 @@ return true; } - void getStats(ScopedString *Str, uptr ClassId, uptr Rss) { - SizeClassInfo *Sci = getSizeClassInfo(ClassId); + void getStats(ScopedString *Str, uptr ClassId, SizeClassInfo *Sci, uptr Rss) + REQUIRES(Sci->Mutex) { if (Sci->AllocatedUser == 0) return; const uptr InUse = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks; @@ -643,7 +653,7 @@ } NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, - bool Force = false) { + bool Force = false) REQUIRES(Sci->Mutex) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); @@ -749,14 +759,15 @@ SizeClassInfo SizeClassInfoArray[NumClasses] = {}; // Track the regions in use, 0 is unused, otherwise store ClassId + 1. + // FIXME: There is no dedicated lock for `PossibleRegions`. ByteMap PossibleRegions = {}; atomic_s32 ReleaseToOsIntervalMs = {}; // Unless several threads request regions simultaneously from different size // classes, the stash rarely contains more than 1 entry. static constexpr uptr MaxStashedRegions = 4; HybridMutex RegionsStashMutex; - uptr NumberOfStashedRegions = 0; - uptr RegionsStash[MaxStashedRegions] = {}; + uptr NumberOfStashedRegions GUARDED_BY(RegionsStashMutex) = 0; + uptr RegionsStash[MaxStashedRegions] GUARDED_BY(RegionsStashMutex) = {}; }; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -18,6 +18,7 @@ #include "release.h" #include "stats.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -60,7 +61,7 @@ static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } - void init(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); DCHECK_EQ(PrimaryBase, 0U); // Reserve the space required for the Primary. @@ -86,7 +87,7 @@ setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } - void unmapTestOnly() { + void unmapTestOnly() NO_THREAD_SAFETY_ANALYSIS { for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); *Region = {}; @@ -101,11 +102,11 @@ DCHECK_LT(ClassId, NumClasses); RegionInfo *Region = getRegionInfo(ClassId); ScopedLock L(Region->Mutex); - TransferBatch *B = popBatchImpl(C, ClassId); + TransferBatch *B = popBatchImpl(C, ClassId, Region); if (UNLIKELY(!B)) { if (UNLIKELY(!populateFreeList(C, ClassId, Region))) return nullptr; - B = popBatchImpl(C, ClassId); + B = popBatchImpl(C, ClassId, Region); // if `populateFreeList` succeeded, we are supposed to get free blocks. DCHECK_NE(B, nullptr); } @@ -129,7 +130,7 @@ // the blocks. if (Size == 1 && UNLIKELY(!populateFreeList(C, ClassId, Region))) return; - pushBlocksImpl(C, ClassId, Array, Size); + pushBlocksImpl(C, ClassId, Region, Array, Size); Region->Stats.PushedBlocks += Size; return; } @@ -153,14 +154,14 @@ } ScopedLock L(Region->Mutex); - pushBlocksImpl(C, ClassId, Array, Size, SameGroup); + pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup); Region->Stats.PushedBlocks += Size; if (ClassId != SizeClassMap::BatchClassId) releaseToOSMaybe(Region, ClassId); } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { // The BatchClassId must be locked last since other classes can use it. for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { if (static_cast(I) == SizeClassMap::BatchClassId) @@ -170,7 +171,7 @@ getRegionInfo(SizeClassMap::BatchClassId)->Mutex.lock(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { getRegionInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) @@ -183,7 +184,11 @@ for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; - const RegionInfo *Region = getRegionInfo(I); + RegionInfo *Region = getRegionInfo(I); + // TODO: The call of `iterateOverBlocks` requires to disabling the + // SizeClassAllocator64. We may consider to locking each region on demand + // only. + Region->Mutex.assertHeld(); const uptr BlockSize = getSizeByClassId(I); const uptr From = Region->RegionBeg; const uptr To = From + Region->AllocatedUser; @@ -199,6 +204,7 @@ uptr PushedBlocks = 0; for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); + ScopedLock L(Region->Mutex); if (Region->MappedUser) TotalMapped += Region->MappedUser; PoppedBlocks += Region->Stats.PoppedBlocks; @@ -209,8 +215,11 @@ TotalMapped >> 20, 0U, PoppedBlocks, PoppedBlocks - PushedBlocks); - for (uptr I = 0; I < NumClasses; I++) - getStats(Str, I, 0); + for (uptr I = 0; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + ScopedLock L(Region->Mutex); + getStats(Str, I, Region, 0); + } } bool setOption(Option O, sptr Value) { @@ -261,15 +270,23 @@ decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr)); } - static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) { + static BlockInfo findNearestBlock(const char *RegionInfoData, + uptr Ptr) NO_THREAD_SAFETY_ANALYSIS { const RegionInfo *RegionInfoArray = reinterpret_cast(RegionInfoData); + uptr ClassId; uptr MinDistance = -1UL; for (uptr I = 0; I != NumClasses; ++I) { if (I == SizeClassMap::BatchClassId) continue; uptr Begin = RegionInfoArray[I].RegionBeg; + // TODO(chiahungduan): In fact, We need to lock the RegionInfo::Mutex. + // However, the RegionInfoData is passed with const qualifier and lock the + // mutex requires modifying RegionInfoData, which means we need to remove + // the const qualifier. This may lead to another undefined behavior (The + // first one is accessing `AllocatedUser` without locking. It's better to + // pass `RegionInfoData` as `void *` then we can lock the mutex properly. uptr End = Begin + RegionInfoArray[I].AllocatedUser; if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) continue; @@ -330,15 +347,17 @@ struct UnpaddedRegionInfo { HybridMutex Mutex; - SinglyLinkedList FreeList; + SinglyLinkedList FreeList GUARDED_BY(Mutex); + // This is initialized before thread creation. uptr RegionBeg = 0; - RegionStats Stats = {}; - u32 RandState = 0; - uptr MappedUser = 0; // Bytes mapped for user memory. - uptr AllocatedUser = 0; // Bytes allocated for user memory. - MapPlatformData Data = {}; - ReleaseToOsInfo ReleaseInfo = {}; - bool Exhausted = false; + RegionStats Stats GUARDED_BY(Mutex) = {}; + u32 RandState GUARDED_BY(Mutex) = 0; + uptr MappedUser GUARDED_BY(Mutex) = 0; // Bytes mapped for user memory. + uptr + AllocatedUser GUARDED_BY(Mutex) = 0; // Bytes allocated for user memory. + MapPlatformData Data GUARDED_BY(Mutex) = {}; + ReleaseToOsInfo ReleaseInfo GUARDED_BY(Mutex) = {}; + bool Exhausted GUARDED_BY(Mutex) = false; }; struct RegionInfo : UnpaddedRegionInfo { char Padding[SCUDO_CACHE_LINE_SIZE - @@ -401,10 +420,10 @@ // `SameGroup=true` instead. // // The region mutex needs to be held while calling this method. - void pushBlocksImpl(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size, - bool SameGroup = false) { + void pushBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region, + CompactPtrT *Array, u32 Size, bool SameGroup = false) + REQUIRES(Region->Mutex) { DCHECK_GT(Size, 0U); - RegionInfo *Region = getRegionInfo(ClassId); auto CreateGroup = [&](uptr GroupId) { BatchGroup *BG = nullptr; @@ -530,8 +549,8 @@ // group id will be considered first. // // The region mutex needs to be held while calling this method. - TransferBatch *popBatchImpl(CacheT *C, uptr ClassId) { - RegionInfo *Region = getRegionInfo(ClassId); + TransferBatch *popBatchImpl(CacheT *C, uptr ClassId, RegionInfo *Region) + REQUIRES(Region->Mutex) { if (Region->FreeList.empty()) return nullptr; @@ -560,7 +579,8 @@ return B; } - NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, RegionInfo *Region) { + NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, RegionInfo *Region) + REQUIRES(Region->Mutex) { const uptr Size = getSizeByClassId(ClassId); const u16 MaxCount = TransferBatch::getMaxCached(Size); @@ -577,7 +597,11 @@ if (!Region->Exhausted) { Region->Exhausted = true; ScopedString Str; - getStats(&Str); + // FIXME: getStats() needs to go over all the regions and will take + // the locks of them. Which means we will try to recursively acquire + // the `Region->Mutex` which is not supported. It will be better to + // log this somewhere else. + // getStats(&Str); Str.append( "Scudo OOM: The process has exhausted %zuM for size class %zu.\n", RegionSize >> 20, Size); @@ -623,7 +647,8 @@ // it only happens when it crosses the group size boundary. Instead of // sorting them, treat them as same group here to avoid sorting the // almost-sorted blocks. - pushBlocksImpl(C, ClassId, &ShuffleArray[I], N, /*SameGroup=*/true); + pushBlocksImpl(C, ClassId, Region, &ShuffleArray[I], N, + /*SameGroup=*/true); I += N; } @@ -634,8 +659,8 @@ return true; } - void getStats(ScopedString *Str, uptr ClassId, uptr Rss) { - RegionInfo *Region = getRegionInfo(ClassId); + void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region, uptr Rss) + REQUIRES(Region->Mutex) { if (Region->MappedUser == 0) return; const uptr InUse = Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks; @@ -652,7 +677,7 @@ } NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, - bool Force = false) { + bool Force = false) REQUIRES(Region->Mutex) { const uptr BlockSize = getSizeByClassId(ClassId); const uptr PageSize = getPageSizeCached(); diff --git a/compiler-rt/lib/scudo/standalone/quarantine.h b/compiler-rt/lib/scudo/standalone/quarantine.h --- a/compiler-rt/lib/scudo/standalone/quarantine.h +++ b/compiler-rt/lib/scudo/standalone/quarantine.h @@ -12,6 +12,7 @@ #include "list.h" #include "mutex.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -172,7 +173,7 @@ typedef QuarantineCache CacheT; using ThisT = GlobalQuarantine; - void init(uptr Size, uptr CacheSize) { + void init(uptr Size, uptr CacheSize) NO_THREAD_SAFETY_ANALYSIS { DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); DCHECK_EQ(atomic_load_relaxed(&MaxSize), 0U); DCHECK_EQ(atomic_load_relaxed(&MinSize), 0U); @@ -197,16 +198,19 @@ drain(C, Cb); } - void NOINLINE drain(CacheT *C, Callback Cb) { + void NOINLINE drain(CacheT *C, Callback Cb) EXCLUDES(CacheMutex) { + bool needRecycle = false; { ScopedLock L(CacheMutex); Cache.transfer(C); + needRecycle = Cache.getSize() > getMaxSize(); } - if (Cache.getSize() > getMaxSize() && RecycleMutex.tryLock()) + + if (needRecycle && RecycleMutex.tryLock()) recycle(atomic_load_relaxed(&MinSize), Cb); } - void NOINLINE drainAndRecycle(CacheT *C, Callback Cb) { + void NOINLINE drainAndRecycle(CacheT *C, Callback Cb) EXCLUDES(CacheMutex) { { ScopedLock L(CacheMutex); Cache.transfer(C); @@ -215,20 +219,21 @@ recycle(0, Cb); } - void getStats(ScopedString *Str) const { + void getStats(ScopedString *Str) EXCLUDES(CacheMutex) { + ScopedLock L(CacheMutex); // It assumes that the world is stopped, just as the allocator's printStats. Cache.getStats(Str); Str->append("Quarantine limits: global: %zuK; thread local: %zuK\n", getMaxSize() >> 10, getCacheSize() >> 10); } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { // RecycleMutex must be locked 1st since we grab CacheMutex within recycle. RecycleMutex.lock(); CacheMutex.lock(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { CacheMutex.unlock(); RecycleMutex.unlock(); } @@ -236,13 +241,14 @@ private: // Read-only data. alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex CacheMutex; - CacheT Cache; + CacheT Cache GUARDED_BY(CacheMutex); alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecycleMutex; atomic_uptr MinSize = {}; atomic_uptr MaxSize = {}; alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize = {}; - void NOINLINE recycle(uptr MinSize, Callback Cb) { + void NOINLINE recycle(uptr MinSize, Callback Cb) RELEASE(RecycleMutex) + EXCLUDES(CacheMutex) { CacheT Tmp; Tmp.init(); { diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h --- a/compiler-rt/lib/scudo/standalone/release.h +++ b/compiler-rt/lib/scudo/standalone/release.h @@ -12,6 +12,7 @@ #include "common.h" #include "list.h" #include "mutex.h" +#include "thread_annotations.h" namespace scudo { @@ -76,7 +77,12 @@ Buffer = nullptr; } - void reset(uptr NumberOfRegion, uptr CountersPerRegion, uptr MaxValue) { + // Lock of `StaticBuffer` is acquired conditionally and there's no easy way to + // specify the thread-safety attribute properly in current code structure. + // Besides, it's the only place we may want to check thread safety. Therefore, + // it's fine to bypass the thread-safety analysis now. + void reset(uptr NumberOfRegion, uptr CountersPerRegion, + uptr MaxValue) NO_THREAD_SAFETY_ANALYSIS { DCHECK_GT(NumberOfRegion, 0); DCHECK_GT(CountersPerRegion, 0); DCHECK_GT(MaxValue, 0); @@ -181,7 +187,7 @@ [[no_unique_address]] MapPlatformData MapData = {}; static HybridMutex Mutex; - static uptr StaticBuffer[StaticBufferCount]; + static uptr StaticBuffer[StaticBufferCount] GUARDED_BY(Mutex); }; template class FreePagesRangeTracker { diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -17,6 +17,7 @@ #include "options.h" #include "stats.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -133,7 +134,7 @@ Config::SecondaryCacheEntriesArraySize, ""); - void init(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK_EQ(EntriesCount, 0U); setOption(Option::MaxCacheEntriesCount, static_cast(Config::SecondaryCacheDefaultMaxEntriesCount)); @@ -142,7 +143,7 @@ setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } - void store(Options Options, LargeBlock::Header *H) { + void store(Options Options, LargeBlock::Header *H) EXCLUDES(Mutex) { if (!canCache(H->CommitSize)) return unmap(H); @@ -227,7 +228,7 @@ } bool retrieve(Options Options, uptr Size, uptr Alignment, - LargeBlock::Header **H, bool *Zeroed) { + LargeBlock::Header **H, bool *Zeroed) EXCLUDES(Mutex) { const uptr PageSize = getPageSizeCached(); const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); bool Found = false; @@ -249,8 +250,9 @@ if (HeaderPos > CommitBase + CommitSize) continue; if (HeaderPos < CommitBase || - AllocPos > CommitBase + PageSize * MaxUnusedCachePages) + AllocPos > CommitBase + PageSize * MaxUnusedCachePages) { continue; + } Found = true; Entry = Entries[I]; Entries[I].CommitBase = 0; @@ -279,6 +281,8 @@ (*H)->MapBase = Entry.MapBase; (*H)->MapSize = Entry.MapSize; (*H)->Data = Entry.Data; + + ScopedLock L(Mutex); EntriesCount--; } return Found; @@ -315,7 +319,7 @@ void releaseToOS() { releaseOlderThan(UINT64_MAX); } - void disableMemoryTagging() { + void disableMemoryTagging() EXCLUDES(Mutex) { ScopedLock L(Mutex); for (u32 I = 0; I != Config::SecondaryCacheQuarantineSize; ++I) { if (Quarantine[I].CommitBase) { @@ -332,9 +336,9 @@ QuarantinePos = -1U; } - void disable() { Mutex.lock(); } + void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); } - void enable() { Mutex.unlock(); } + void enable() NO_THREAD_SAFETY_ANALYSIS { Mutex.unlock(); } void unmapTestOnly() { empty(); } @@ -375,7 +379,7 @@ u64 Time; }; - void releaseIfOlderThan(CachedBlock &Entry, u64 Time) { + void releaseIfOlderThan(CachedBlock &Entry, u64 Time) REQUIRES(Mutex) { if (!Entry.CommitBase || !Entry.Time) return; if (Entry.Time > Time) { @@ -387,7 +391,7 @@ Entry.Time = 0; } - void releaseOlderThan(u64 Time) { + void releaseOlderThan(u64 Time) EXCLUDES(Mutex) { ScopedLock L(Mutex); if (!EntriesCount || OldestTime == 0 || OldestTime > Time) return; @@ -399,22 +403,24 @@ } HybridMutex Mutex; - u32 EntriesCount = 0; - u32 QuarantinePos = 0; + u32 EntriesCount GUARDED_BY(Mutex) = 0; + u32 QuarantinePos GUARDED_BY(Mutex) = 0; atomic_u32 MaxEntriesCount = {}; atomic_uptr MaxEntrySize = {}; - u64 OldestTime = 0; - u32 IsFullEvents = 0; + u64 OldestTime GUARDED_BY(Mutex) = 0; + u32 IsFullEvents GUARDED_BY(Mutex) = 0; atomic_s32 ReleaseToOsIntervalMs = {}; - CachedBlock Entries[Config::SecondaryCacheEntriesArraySize] = {}; + CachedBlock + Entries[Config::SecondaryCacheEntriesArraySize] GUARDED_BY(Mutex) = {}; NonZeroLengthArray - Quarantine = {}; + Quarantine GUARDED_BY(Mutex) = {}; }; template class MapAllocator { public: - void init(GlobalStats *S, s32 ReleaseToOsInterval = -1) { + void init(GlobalStats *S, + s32 ReleaseToOsInterval = -1) NO_THREAD_SAFETY_ANALYSIS { DCHECK_EQ(AllocatedBytes, 0U); DCHECK_EQ(FreedBytes, 0U); Cache.init(ReleaseToOsInterval); @@ -438,19 +444,21 @@ return getBlockEnd(Ptr) - reinterpret_cast(Ptr); } - void getStats(ScopedString *Str) const; + void getStats(ScopedString *Str); - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); Cache.disable(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { Cache.enable(); Mutex.unlock(); } template void iterateOverBlocks(F Callback) const { + Mutex.assertHeld(); + for (const auto &H : InUseBlocks) { uptr Ptr = reinterpret_cast(&H) + LargeBlock::getHeaderSize(); if (allocatorSupportsMemoryTagging()) @@ -472,14 +480,14 @@ private: typename Config::SecondaryCache Cache; - HybridMutex Mutex; - DoublyLinkedList InUseBlocks; - uptr AllocatedBytes = 0; - uptr FreedBytes = 0; - uptr LargestSize = 0; - u32 NumberOfAllocs = 0; - u32 NumberOfFrees = 0; - LocalStats Stats; + mutable HybridMutex Mutex; + DoublyLinkedList InUseBlocks GUARDED_BY(Mutex); + uptr AllocatedBytes GUARDED_BY(Mutex) = 0; + uptr FreedBytes GUARDED_BY(Mutex) = 0; + uptr LargestSize GUARDED_BY(Mutex) = 0; + u32 NumberOfAllocs GUARDED_BY(Mutex) = 0; + u32 NumberOfFrees GUARDED_BY(Mutex) = 0; + LocalStats Stats GUARDED_BY(Mutex); }; // As with the Primary, the size passed to this function includes any desired @@ -600,7 +608,8 @@ } template -void MapAllocator::deallocate(Options Options, void *Ptr) { +void MapAllocator::deallocate(Options Options, void *Ptr) + EXCLUDES(Mutex) { LargeBlock::Header *H = LargeBlock::getHeader(Ptr); const uptr CommitSize = H->CommitSize; { @@ -615,7 +624,8 @@ } template -void MapAllocator::getStats(ScopedString *Str) const { +void MapAllocator::getStats(ScopedString *Str) EXCLUDES(Mutex) { + ScopedLock L(Mutex); Str->append("Stats: MapAllocator: allocated %u times (%zuK), freed %u times " "(%zuK), remains %u (%zuK) max %zuM\n", NumberOfAllocs, AllocatedBytes >> 10, NumberOfFrees, diff --git a/compiler-rt/lib/scudo/standalone/stats.h b/compiler-rt/lib/scudo/standalone/stats.h --- a/compiler-rt/lib/scudo/standalone/stats.h +++ b/compiler-rt/lib/scudo/standalone/stats.h @@ -12,6 +12,7 @@ #include "atomic_helpers.h" #include "list.h" #include "mutex.h" +#include "thread_annotations.h" #include @@ -60,19 +61,19 @@ public: void init() { LocalStats::init(); } - void link(LocalStats *S) { + void link(LocalStats *S) EXCLUDES(Mutex) { ScopedLock L(Mutex); StatsList.push_back(S); } - void unlink(LocalStats *S) { + void unlink(LocalStats *S) EXCLUDES(Mutex) { ScopedLock L(Mutex); StatsList.remove(S); for (uptr I = 0; I < StatCount; I++) add(static_cast(I), S->get(static_cast(I))); } - void get(uptr *S) const { + void get(uptr *S) const EXCLUDES(Mutex) { ScopedLock L(Mutex); for (uptr I = 0; I < StatCount; I++) S[I] = LocalStats::get(static_cast(I)); @@ -85,15 +86,15 @@ S[I] = static_cast(S[I]) >= 0 ? S[I] : 0; } - void lock() { Mutex.lock(); } - void unlock() { Mutex.unlock(); } + void lock() ACQUIRE(Mutex) { Mutex.lock(); } + void unlock() RELEASE(Mutex) { Mutex.unlock(); } - void disable() { lock(); } - void enable() { unlock(); } + void disable() ACQUIRE(Mutex) { lock(); } + void enable() RELEASE(Mutex) { unlock(); } private: mutable HybridMutex Mutex; - DoublyLinkedList StatsList; + DoublyLinkedList StatsList GUARDED_BY(Mutex); }; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt --- a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt @@ -31,6 +31,11 @@ -mno-omit-leaf-frame-pointer) endif() +if (COMPILER_RT_TEST_COMPILER_ID MATCHES Clang) + list(APPEND SCUDO_UNITTEST_CFLAGS -Werror=thread-safety) +endif() + + set(SCUDO_TEST_ARCH ${SCUDO_STANDALONE_SUPPORTED_ARCH}) # gtests requires c++ diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -435,7 +435,7 @@ EXPECT_NE(Stats.find("Stats: Quarantine"), std::string::npos); } -SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) { +SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) NO_THREAD_SAFETY_ANALYSIS { auto *Allocator = this->Allocator.get(); std::vector V; @@ -447,9 +447,9 @@ bool UnlockRequired; auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); - EXPECT_TRUE(!TSD->Cache.isEmpty()); - TSD->Cache.drain(); - EXPECT_TRUE(TSD->Cache.isEmpty()); + EXPECT_TRUE(!TSD->getCache().isEmpty()); + TSD->getCache().drain(); + EXPECT_TRUE(TSD->getCache().isEmpty()); if (UnlockRequired) TSD->unlock(); } @@ -724,7 +724,7 @@ bool UnlockRequired; auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); - TSD->Cache.drain(); + TSD->getCache().drain(); Allocator->releaseToOS(); } diff --git a/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp b/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp @@ -99,3 +99,10 @@ for (scudo::u32 I = 0; I < NumberOfThreads; I++) pthread_join(Threads[I], 0); } + +TEST(ScudoMutexTest, MutexAssertHeld) { + scudo::HybridMutex M; + M.lock(); + M.assertHeld(); + M.unlock(); +} diff --git a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp @@ -82,7 +82,7 @@ EXPECT_FALSE(Allocator->isInitialized()); auto Registry = Allocator->getTSDRegistry(); - Registry->init(Allocator.get()); + Registry->initOnceMaybe(Allocator.get()); EXPECT_TRUE(Allocator->isInitialized()); } @@ -102,15 +102,15 @@ bool UnlockRequired; auto TSD = Registry->getTSDAndLock(&UnlockRequired); EXPECT_NE(TSD, nullptr); - EXPECT_EQ(TSD->Cache.Canary, 0U); + EXPECT_EQ(TSD->getCache().Canary, 0U); if (UnlockRequired) TSD->unlock(); Registry->initThreadMaybe(Allocator.get(), /*MinimalInit=*/false); TSD = Registry->getTSDAndLock(&UnlockRequired); EXPECT_NE(TSD, nullptr); - EXPECT_EQ(TSD->Cache.Canary, 0U); - memset(&TSD->Cache, 0x42, sizeof(TSD->Cache)); + EXPECT_EQ(TSD->getCache().Canary, 0U); + memset(&TSD->getCache(), 0x42, sizeof(TSD->getCache())); if (UnlockRequired) TSD->unlock(); } @@ -141,14 +141,14 @@ // For an exclusive TSD, the cache should be empty. We cannot guarantee the // same for a shared TSD. if (!UnlockRequired) - EXPECT_EQ(TSD->Cache.Canary, 0U); + EXPECT_EQ(TSD->getCache().Canary, 0U); // Transform the thread id to a uptr to use it as canary. const scudo::uptr Canary = static_cast( std::hash{}(std::this_thread::get_id())); - TSD->Cache.Canary = Canary; + TSD->getCache().Canary = Canary; // Loop a few times to make sure that a concurrent thread isn't modifying it. for (scudo::uptr I = 0; I < 4096U; I++) - EXPECT_EQ(TSD->Cache.Canary, Canary); + EXPECT_EQ(TSD->getCache().Canary, Canary); if (UnlockRequired) TSD->unlock(); } diff --git a/compiler-rt/lib/scudo/standalone/thread_annotations.h b/compiler-rt/lib/scudo/standalone/thread_annotations.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/thread_annotations.h @@ -0,0 +1,70 @@ +//===-- thread_annotations.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_THREAD_ANNOTATIONS_ +#define SCUDO_THREAD_ANNOTATIONS_ + +// Enable thread safety attributes only with clang. +// The attributes can be safely ignored when compiling with other compilers. +#if defined(__clang__) +#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x)) +#else +#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op +#endif + +#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x)) + +#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable) + +#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x)) + +#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x)) + +#define ACQUIRED_BEFORE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__)) + +#define ACQUIRED_AFTER(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__)) + +#define REQUIRES(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__)) + +#define REQUIRES_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__)) + +#define ACQUIRE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__)) + +#define ACQUIRE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__)) + +#define RELEASE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__)) + +#define RELEASE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__)) + +#define TRY_ACQUIRE(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__)) + +#define TRY_ACQUIRE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__)) + +#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__)) + +#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x)) + +#define ASSERT_SHARED_CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x)) + +#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x)) + +#define NO_THREAD_SAFETY_ANALYSIS \ + THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis) + +#endif // SCUDO_THREAD_ANNOTATIONS_ diff --git a/compiler-rt/lib/scudo/standalone/trusty.cpp b/compiler-rt/lib/scudo/standalone/trusty.cpp --- a/compiler-rt/lib/scudo/standalone/trusty.cpp +++ b/compiler-rt/lib/scudo/standalone/trusty.cpp @@ -76,6 +76,8 @@ void HybridMutex::unlock() {} +void HybridMutex::assertHeldImpl() {} + u64 getMonotonicTime() { timespec TS; clock_gettime(CLOCK_MONOTONIC, &TS); diff --git a/compiler-rt/lib/scudo/standalone/tsd.h b/compiler-rt/lib/scudo/standalone/tsd.h --- a/compiler-rt/lib/scudo/standalone/tsd.h +++ b/compiler-rt/lib/scudo/standalone/tsd.h @@ -12,6 +12,7 @@ #include "atomic_helpers.h" #include "common.h" #include "mutex.h" +#include "thread_annotations.h" #include // for PTHREAD_DESTRUCTOR_ITERATIONS #include @@ -24,21 +25,17 @@ namespace scudo { template struct alignas(SCUDO_CACHE_LINE_SIZE) TSD { - typename Allocator::CacheT Cache; - typename Allocator::QuarantineCacheT QuarantineCache; using ThisT = TSD; u8 DestructorIterations = 0; - void init(Allocator *Instance) { + void init(Allocator *Instance) NO_THREAD_SAFETY_ANALYSIS { DCHECK_EQ(DestructorIterations, 0U); DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); Instance->initCache(&Cache); DestructorIterations = PTHREAD_DESTRUCTOR_ITERATIONS; } - void commitBack(Allocator *Instance) { Instance->commitBack(this); } - - inline bool tryLock() { + inline bool tryLock() NO_THREAD_SAFETY_ANALYSIS { if (Mutex.tryLock()) { atomic_store_relaxed(&Precedence, 0); return true; @@ -49,16 +46,36 @@ static_cast(getMonotonicTime() >> FIRST_32_SECOND_64(16, 0))); return false; } - inline void lock() { + inline void lock() NO_THREAD_SAFETY_ANALYSIS { atomic_store_relaxed(&Precedence, 0); Mutex.lock(); } - inline void unlock() { Mutex.unlock(); } + inline void unlock() NO_THREAD_SAFETY_ANALYSIS { Mutex.unlock(); } inline uptr getPrecedence() { return atomic_load_relaxed(&Precedence); } + void commitBack(Allocator *Instance) ASSERT_CAPABILITY(Mutex) { + Instance->commitBack(this); + } + + // Ideally, we may want to assert that all the operations on + // Cache/QuarantineCache always have the `Mutex` acquired. However, the + // current architecture of accessing TSD is not easy to cooperate with the + // thread-safety analysis because of pointer aliasing. So now we just add the + // assertion on the getters of Cache/QuarantineCache. + typename Allocator::CacheT &getCache() ASSERT_CAPABILITY(Mutex) { + return Cache; + } + typename Allocator::QuarantineCacheT &getQuarantineCache() + ASSERT_CAPABILITY(Mutex) { + return QuarantineCache; + } + private: HybridMutex Mutex; atomic_uptr Precedence = {}; + + typename Allocator::CacheT Cache GUARDED_BY(Mutex); + typename Allocator::QuarantineCacheT QuarantineCache GUARDED_BY(Mutex); }; } // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h --- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h +++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h @@ -25,7 +25,7 @@ template void teardownThread(void *Ptr); template struct TSDRegistryExT { - void init(Allocator *Instance) { + void init(Allocator *Instance) REQUIRES(Mutex) { DCHECK(!Initialized); Instance->init(); CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread), 0); @@ -33,14 +33,14 @@ Initialized = true; } - void initOnceMaybe(Allocator *Instance) { + void initOnceMaybe(Allocator *Instance) EXCLUDES(Mutex) { ScopedLock L(Mutex); if (LIKELY(Initialized)) return; init(Instance); // Sets Initialized. } - void unmapTestOnly(Allocator *Instance) { + void unmapTestOnly(Allocator *Instance) EXCLUDES(Mutex) { DCHECK(Instance); if (reinterpret_cast(pthread_getspecific(PThreadKey))) { DCHECK_EQ(reinterpret_cast(pthread_getspecific(PThreadKey)), @@ -53,6 +53,7 @@ FallbackTSD.commitBack(Instance); FallbackTSD = {}; State = {}; + ScopedLock L(Mutex); Initialized = false; } @@ -62,7 +63,13 @@ initThread(Instance, MinimalInit); } - ALWAYS_INLINE TSD *getTSDAndLock(bool *UnlockRequired) { + // TODO(chiahungduan): Consider removing the argument `UnlockRequired` by + // embedding the logic into TSD or always locking the TSD. It will enable us + // to properly mark thread annotation here and adding proper runtime + // assertions in the member functions of TSD. For example, assert the lock is + // acquired before calling TSD::commitBack(). + ALWAYS_INLINE TSD * + getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS { if (LIKELY(State.InitState == ThreadState::Initialized && !atomic_load(&Disabled, memory_order_acquire))) { *UnlockRequired = false; @@ -75,13 +82,13 @@ // To disable the exclusive TSD registry, we effectively lock the fallback TSD // and force all threads to attempt to use it instead of their local one. - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); FallbackTSD.lock(); atomic_store(&Disabled, 1U, memory_order_release); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { atomic_store(&Disabled, 0U, memory_order_release); FallbackTSD.unlock(); Mutex.unlock(); @@ -113,7 +120,7 @@ } pthread_key_t PThreadKey = {}; - bool Initialized = false; + bool Initialized GUARDED_BY(Mutex) = false; atomic_u8 Disabled = {}; TSD FallbackTSD; HybridMutex Mutex; @@ -128,7 +135,8 @@ template thread_local ThreadState TSDRegistryExT::State; -template void teardownThread(void *Ptr) { +template +void teardownThread(void *Ptr) NO_THREAD_SAFETY_ANALYSIS { typedef TSDRegistryExT TSDRegistryT; Allocator *Instance = reinterpret_cast(Ptr); // The glibc POSIX thread-local-storage deallocation routine calls user diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -24,7 +24,7 @@ template struct TSDRegistrySharedT { - void init(Allocator *Instance) { + void init(Allocator *Instance) REQUIRES(Mutex) { DCHECK(!Initialized); Instance->init(); for (u32 I = 0; I < TSDsArraySize; I++) @@ -35,19 +35,20 @@ Initialized = true; } - void initOnceMaybe(Allocator *Instance) { + void initOnceMaybe(Allocator *Instance) EXCLUDES(Mutex) { ScopedLock L(Mutex); if (LIKELY(Initialized)) return; init(Instance); // Sets Initialized. } - void unmapTestOnly(Allocator *Instance) { + void unmapTestOnly(Allocator *Instance) EXCLUDES(Mutex) { for (u32 I = 0; I < TSDsArraySize; I++) { TSDs[I].commitBack(Instance); TSDs[I] = {}; } setCurrentTSD(nullptr); + ScopedLock L(Mutex); Initialized = false; } @@ -58,7 +59,10 @@ initThread(Instance); } - ALWAYS_INLINE TSD *getTSDAndLock(bool *UnlockRequired) { + // TSDs is an array of locks and which is not supported for marking + // thread-safety capability. + ALWAYS_INLINE TSD * + getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS { TSD *TSD = getCurrentTSD(); DCHECK(TSD); *UnlockRequired = true; @@ -75,16 +79,26 @@ return getTSDAndLockSlow(TSD); } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); + disableAllTSDs(); + } + // TSDs is an array of locks which is not supported for marking thread-safety + // capability. + void disableAllTSDs() NO_THREAD_SAFETY_ANALYSIS { for (u32 I = 0; I < TSDsArraySize; I++) TSDs[I].lock(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { + enableAllTSDs(); + Mutex.unlock(); + } + // TSDs is an array of locks and which is not supported for marking + // thread-safety capability. + void enableAllTSDs() NO_THREAD_SAFETY_ANALYSIS { for (s32 I = static_cast(TSDsArraySize - 1); I >= 0; I--) TSDs[I].unlock(); - Mutex.unlock(); } bool setOption(Option O, sptr Value) { @@ -119,7 +133,7 @@ return reinterpret_cast *>(*getTlsPtr() & ~1ULL); } - bool setNumberOfTSDs(u32 N) { + bool setNumberOfTSDs(u32 N) EXCLUDES(MutexTSDs) { ScopedLock L(MutexTSDs); if (N < NumberOfTSDs) return false; @@ -150,7 +164,7 @@ *getTlsPtr() |= B; } - NOINLINE void initThread(Allocator *Instance) { + NOINLINE void initThread(Allocator *Instance) NO_THREAD_SAFETY_ANALYSIS { initOnceMaybe(Instance); // Initial context assignment is done in a plain round-robin fashion. const u32 Index = atomic_fetch_add(&CurrentIndex, 1U, memory_order_relaxed); @@ -158,7 +172,10 @@ Instance->callPostInitCallback(); } - NOINLINE TSD *getTSDAndLockSlow(TSD *CurrentTSD) { + // TSDs is an array of locks which is not supported for marking thread-safety + // capability. + NOINLINE TSD *getTSDAndLockSlow(TSD *CurrentTSD) + EXCLUDES(MutexTSDs) { // Use the Precedence of the current TSD as our random seed. Since we are // in the slow path, it means that tryLock failed, and as a result it's // very likely that said Precedence is non-zero. @@ -202,10 +219,10 @@ } atomic_u32 CurrentIndex = {}; - u32 NumberOfTSDs = 0; - u32 NumberOfCoPrimes = 0; - u32 CoPrimes[TSDsArraySize] = {}; - bool Initialized = false; + u32 NumberOfTSDs GUARDED_BY(MutexTSDs) = 0; + u32 NumberOfCoPrimes GUARDED_BY(MutexTSDs) = 0; + u32 CoPrimes[TSDsArraySize] GUARDED_BY(MutexTSDs) = {}; + bool Initialized GUARDED_BY(Mutex) = false; HybridMutex Mutex; HybridMutex MutexTSDs; TSD TSDs[TSDsArraySize]; diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -238,7 +238,10 @@ if (size < max_size) sizes[size]++; }; + + SCUDO_ALLOCATOR.disable(); SCUDO_ALLOCATOR.iterateOverChunks(0, -1ul, callback, sizes); + SCUDO_ALLOCATOR.enable(); fputs("\n", stream); for (scudo::uptr i = 0; i != max_size; ++i)