diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h --- a/compiler-rt/lib/scudo/standalone/allocator_config.h +++ b/compiler-rt/lib/scudo/standalone/allocator_config.h @@ -48,9 +48,10 @@ typedef SizeClassAllocator32 Primary; #endif // Cache blocks up to 2MB - typedef MapAllocator> Secondary; + typedef MapAllocator> + Secondary; template - using TSDRegistryT = TSDRegistrySharedT; // Shared, max 2 TSDs. + using TSDRegistryT = TSDRegistrySharedT; // Shared, max 8 TSDs. }; struct AndroidSvelteConfig { @@ -62,9 +63,9 @@ // 64KB regions typedef SizeClassAllocator32 Primary; #endif - typedef MapAllocator> Secondary; + typedef MapAllocator> Secondary; template - using TSDRegistryT = TSDRegistrySharedT; // Shared, only 1 TSD. + using TSDRegistryT = TSDRegistrySharedT; // Shared, max 2 TSDs. }; #if SCUDO_CAN_USE_PRIMARY64 @@ -73,7 +74,7 @@ typedef SizeClassAllocator64 Primary; typedef MapAllocator Secondary; template - using TSDRegistryT = TSDRegistrySharedT; // Shared, max 8 TSDs. + using TSDRegistryT = TSDRegistrySharedT; // Shared, max 8 TSDs. }; #endif diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -41,8 +41,6 @@ namespace scudo { -enum class Option { ReleaseInterval, MemtagTuning }; - template class Allocator { public: @@ -277,7 +275,7 @@ } #endif // GWP_ASAN_HOOKS - FillContentsMode FillContents = + const FillContentsMode FillContents = ZeroContents ? ZeroFill : Options.FillContents; if (UNLIKELY(Alignment > MaxAlignment)) { @@ -285,7 +283,7 @@ return nullptr; reportAlignmentTooBig(Alignment, MaxAlignment); } - if (Alignment < MinAlignment) + if (UNLIKELY(Alignment < MinAlignment)) Alignment = MinAlignment; // If the requested size happens to be 0 (more common than you might think), @@ -322,13 +320,11 @@ if (UNLIKELY(!Block)) { while (ClassId < SizeClassMap::LargestClassId) { Block = TSD->Cache.allocate(++ClassId); - if (LIKELY(Block)) { + if (LIKELY(Block)) break; - } } - if (UNLIKELY(!Block)) { + if (UNLIKELY(!Block)) ClassId = 0; - } } if (UnlockRequired) TSD->unlock(); @@ -349,7 +345,7 @@ void *Ptr = reinterpret_cast(UserPtr); void *TaggedPtr = Ptr; - if (ClassId) { + if (LIKELY(ClassId)) { // We only need to zero or tag the contents for Primary backed // allocations. We only set tags for primary allocations in order to avoid // faulting potentially large numbers of pages for large secondary @@ -692,11 +688,7 @@ } bool setOption(Option O, sptr Value) { - if (O == Option::ReleaseInterval) { - Primary.setReleaseToOsIntervalMs(static_cast(Value)); - Secondary.setReleaseToOsIntervalMs(static_cast(Value)); - return true; - } + initThreadMaybe(); if (O == Option::MemtagTuning) { // Enabling odd/even tags involves a tradeoff between use-after-free // detection and buffer overflow detection. Odd/even tags make it more @@ -705,14 +697,19 @@ // use-after-free is less likely to be detected because the tag space for // any particular chunk is cut in half. Therefore we use this tuning // setting to control whether odd/even tags are enabled. - if (Value == M_MEMTAG_TUNING_BUFFER_OVERFLOW) { + if (Value == M_MEMTAG_TUNING_BUFFER_OVERFLOW) Options.UseOddEvenTags = true; - return true; - } - if (Value == M_MEMTAG_TUNING_UAF) { + else if (Value == M_MEMTAG_TUNING_UAF) Options.UseOddEvenTags = false; - return true; - } + return true; + } else { + // We leave it to the various sub-components to decide whether or not they + // want to handle the option, but we do not want to short-circuit + // execution if one of the setOption was to return false. + const bool PrimaryResult = Primary.setOption(O, Value); + const bool SecondaryResult = Secondary.setOption(O, Value); + const bool RegistryResult = TSDRegistry.setOption(O, Value); + return PrimaryResult && SecondaryResult && RegistryResult; } return false; } @@ -805,8 +802,7 @@ PrimaryT::findNearestBlock(RegionInfoPtr, UntaggedFaultAddr); auto GetGranule = [&](uptr Addr, const char **Data, uint8_t *Tag) -> bool { - if (Addr < MemoryAddr || - Addr + archMemoryTagGranuleSize() < Addr || + if (Addr < MemoryAddr || Addr + archMemoryTagGranuleSize() < Addr || Addr + archMemoryTagGranuleSize() > MemoryAddr + MemorySize) return false; *Data = &Memory[Addr - MemoryAddr]; @@ -950,10 +946,10 @@ u32 Cookie; struct { - u8 MayReturnNull : 1; // may_return_null + u8 MayReturnNull : 1; // may_return_null FillContentsMode FillContents : 2; // zero_contents, pattern_fill_contents - u8 DeallocTypeMismatch : 1; // dealloc_type_mismatch - u8 DeleteSizeMismatch : 1; // delete_size_mismatch + u8 DeallocTypeMismatch : 1; // dealloc_type_mismatch + u8 DeleteSizeMismatch : 1; // delete_size_mismatch u8 TrackAllocationStacks : 1; u8 UseOddEvenTags : 1; u32 QuarantineMaxChunkSize; // quarantine_max_chunk_size diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -182,6 +182,14 @@ uptr RegionEnd; }; +enum class Option : u8 { + ReleaseInterval, // Release to OS interval in milliseconds. + MemtagTuning, // Whether to tune tagging for UAF or overflow. + MaxCacheEntriesCount, // Maximum number of blocks that can be cached. + MaxCacheEntrySize, // Maximum size of a block that can be cached. + MaxTSDsCount, // Number of usable TSDs for the shared registry. +}; + constexpr unsigned char PatternFillByte = 0xAB; enum FillContentsMode { diff --git a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h --- a/compiler-rt/lib/scudo/standalone/include/scudo/interface.h +++ b/compiler-rt/lib/scudo/standalone/include/scudo/interface.h @@ -121,6 +121,18 @@ #define M_MEMTAG_TUNING -102 #endif +#ifndef M_CACHE_COUNT_MAX +#define M_CACHE_COUNT_MAX -200 +#endif + +#ifndef M_CACHE_SIZE_MAX +#define M_CACHE_SIZE_MAX -201 +#endif + +#ifndef M_TSDS_COUNT_MAX +#define M_TSDS_COUNT_MAX -202 +#endif + enum scudo_memtag_tuning { // Tune for buffer overflows. M_MEMTAG_TUNING_BUFFER_OVERFLOW, diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -86,7 +86,7 @@ if (Sci->CanRelease) Sci->ReleaseInfo.LastReleaseAtNs = Time; } - setReleaseToOsIntervalMs(ReleaseToOsInterval); + setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } void init(s32 ReleaseToOsInterval) { memset(this, 0, sizeof(*this)); @@ -184,13 +184,16 @@ getStats(Str, I, 0); } - void setReleaseToOsIntervalMs(s32 Interval) { - if (Interval >= MaxReleaseToOsIntervalMs) { - Interval = MaxReleaseToOsIntervalMs; - } else if (Interval <= MinReleaseToOsIntervalMs) { - Interval = MinReleaseToOsIntervalMs; + bool setOption(Option O, sptr Value) { + if (O == Option::ReleaseInterval) { + const s32 Interval = + Max(Min(static_cast(Value), MaxReleaseToOsIntervalMs), + MinReleaseToOsIntervalMs); + atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + return true; } - atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + // Not supported by the Primary, but not an error either. + return true; } uptr releaseToOS() { @@ -423,10 +426,6 @@ AvailableChunks, Rss >> 10, Sci->ReleaseInfo.RangesReleased); } - s32 getReleaseToOsIntervalMs() { - return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); - } - NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, bool Force = false) { const uptr BlockSize = getSizeByClassId(ClassId); @@ -457,7 +456,8 @@ } if (!Force) { - const s32 IntervalMs = getReleaseToOsIntervalMs(); + const s32 IntervalMs = + atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); if (IntervalMs < 0) return 0; if (Sci->ReleaseInfo.LastReleaseAtNs + diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -91,7 +91,7 @@ if (Region->CanRelease) Region->ReleaseInfo.LastReleaseAtNs = Time; } - setReleaseToOsIntervalMs(ReleaseToOsInterval); + setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); if (SupportsMemoryTagging) UseMemoryTagging = systemSupportsMemoryTagging(); @@ -185,13 +185,16 @@ getStats(Str, I, 0); } - void setReleaseToOsIntervalMs(s32 Interval) { - if (Interval >= MaxReleaseToOsIntervalMs) { - Interval = MaxReleaseToOsIntervalMs; - } else if (Interval <= MinReleaseToOsIntervalMs) { - Interval = MinReleaseToOsIntervalMs; + bool setOption(Option O, sptr Value) { + if (O == Option::ReleaseInterval) { + const s32 Interval = + Max(Min(static_cast(Value), MaxReleaseToOsIntervalMs), + MinReleaseToOsIntervalMs); + atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + return true; } - atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + // Not supported by the Primary, but not an error either. + return true; } uptr releaseToOS() { @@ -435,10 +438,6 @@ getRegionBaseByClassId(ClassId)); } - s32 getReleaseToOsIntervalMs() { - return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); - } - NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, bool Force = false) { const uptr BlockSize = getSizeByClassId(ClassId); @@ -469,7 +468,8 @@ } if (!Force) { - const s32 IntervalMs = getReleaseToOsIntervalMs(); + const s32 IntervalMs = + atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); if (IntervalMs < 0) return 0; if (Region->ReleaseInfo.LastReleaseAtNs + diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -56,14 +56,21 @@ return false; } bool store(UNUSED LargeBlock::Header *H) { return false; } - static bool canCache(UNUSED uptr Size) { return false; } + bool canCache(UNUSED uptr Size) { return false; } void disable() {} void enable() {} void releaseToOS() {} - void setReleaseToOsIntervalMs(UNUSED s32 Interval) {} + bool setOption(Option O, UNUSED sptr Value) { + if (O == Option::ReleaseInterval || O == Option::MaxCacheEntriesCount || + O == Option::MaxCacheEntrySize) + return false; + // Not supported by the Secondary Cache, but not an error either. + return true; + } }; -template class MapAllocatorCache { @@ -71,10 +78,17 @@ // Fuchsia doesn't allow releasing Secondary blocks yet. Note that 0 length // arrays are an extension for some compilers. // FIXME(kostyak): support (partially) the cache on Fuchsia. - static_assert(!SCUDO_FUCHSIA || MaxEntriesCount == 0U, ""); + static_assert(!SCUDO_FUCHSIA || EntriesArraySize == 0U, ""); + + // Ensure the default maximum specified fits the array. + static_assert(DefaultMaxEntriesCount <= EntriesArraySize, ""); void initLinkerInitialized(s32 ReleaseToOsInterval) { - setReleaseToOsIntervalMs(ReleaseToOsInterval); + setOption(Option::MaxCacheEntriesCount, + static_cast(DefaultMaxEntriesCount)); + setOption(Option::MaxCacheEntrySize, + static_cast(DefaultMaxEntrySize)); + setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } void init(s32 ReleaseToOsInterval) { memset(this, 0, sizeof(*this)); @@ -85,13 +99,14 @@ bool EntryCached = false; bool EmptyCache = false; const u64 Time = getMonotonicTime(); + const u32 MaxCount = atomic_load(&MaxEntriesCount, memory_order_relaxed); { ScopedLock L(Mutex); - if (EntriesCount == MaxEntriesCount) { + if (EntriesCount >= MaxCount) { if (IsFullEvents++ == 4U) EmptyCache = true; } else { - for (uptr I = 0; I < MaxEntriesCount; I++) { + for (u32 I = 0; I < MaxCount; I++) { if (Entries[I].Block) continue; if (I != 0) @@ -111,17 +126,19 @@ s32 Interval; if (EmptyCache) empty(); - else if ((Interval = getReleaseToOsIntervalMs()) >= 0) + else if ((Interval = atomic_load(&ReleaseToOsIntervalMs, + memory_order_relaxed)) >= 0) releaseOlderThan(Time - static_cast(Interval) * 1000000); return EntryCached; } bool retrieve(uptr Size, LargeBlock::Header **H) { const uptr PageSize = getPageSizeCached(); + const u32 MaxCount = atomic_load(&MaxEntriesCount, memory_order_relaxed); ScopedLock L(Mutex); if (EntriesCount == 0) return false; - for (uptr I = 0; I < MaxEntriesCount; I++) { + for (u32 I = 0; I < MaxCount; I++) { if (!Entries[I].Block) continue; const uptr BlockSize = Entries[I].BlockEnd - Entries[I].Block; @@ -141,17 +158,31 @@ return false; } - static bool canCache(uptr Size) { - return MaxEntriesCount != 0U && Size <= MaxEntrySize; + bool canCache(uptr Size) { + return atomic_load(&MaxEntriesCount, memory_order_relaxed) != 0U && + Size <= atomic_load(&MaxEntrySize, memory_order_relaxed); } - void setReleaseToOsIntervalMs(s32 Interval) { - if (Interval >= MaxReleaseToOsIntervalMs) { - Interval = MaxReleaseToOsIntervalMs; - } else if (Interval <= MinReleaseToOsIntervalMs) { - Interval = MinReleaseToOsIntervalMs; + bool setOption(Option O, sptr Value) { + if (O == Option::ReleaseInterval) { + const s32 Interval = + Max(Min(static_cast(Value), MaxReleaseToOsIntervalMs), + MinReleaseToOsIntervalMs); + atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + return true; + } else if (O == Option::MaxCacheEntriesCount) { + const u32 MaxCount = static_cast(Value); + if (MaxCount > EntriesArraySize) + return false; + atomic_store(&MaxEntriesCount, MaxCount, memory_order_relaxed); + return true; + } else if (O == Option::MaxCacheEntrySize) { + atomic_store(&MaxEntrySize, static_cast(Value), + memory_order_relaxed); + return true; } - atomic_store(&ReleaseToOsIntervalMs, Interval, memory_order_relaxed); + // Not supported by the Secondary Cache, but not an error either. + return true; } void releaseToOS() { releaseOlderThan(UINT64_MAX); } @@ -166,11 +197,11 @@ void *MapBase; uptr MapSize; MapPlatformData Data; - } MapInfo[MaxEntriesCount]; + } MapInfo[EntriesArraySize]; uptr N = 0; { ScopedLock L(Mutex); - for (uptr I = 0; I < MaxEntriesCount; I++) { + for (uptr I = 0; I < EntriesArraySize; I++) { if (!Entries[I].Block) continue; MapInfo[N].MapBase = reinterpret_cast(Entries[I].MapBase); @@ -191,7 +222,7 @@ ScopedLock L(Mutex); if (!EntriesCount) return; - for (uptr I = 0; I < MaxEntriesCount; I++) { + for (uptr I = 0; I < EntriesArraySize; I++) { if (!Entries[I].Block || !Entries[I].Time || Entries[I].Time > Time) continue; releasePagesToOS(Entries[I].Block, 0, @@ -201,10 +232,6 @@ } } - s32 getReleaseToOsIntervalMs() { - return atomic_load(&ReleaseToOsIntervalMs, memory_order_relaxed); - } - struct CachedBlock { uptr Block; uptr BlockEnd; @@ -215,8 +242,10 @@ }; HybridMutex Mutex; - CachedBlock Entries[MaxEntriesCount]; + CachedBlock Entries[EntriesArraySize]; u32 EntriesCount; + atomic_u32 MaxEntriesCount; + atomic_uptr MaxEntrySize; uptr LargestSize; u32 IsFullEvents; atomic_s32 ReleaseToOsIntervalMs; @@ -265,11 +294,9 @@ Callback(reinterpret_cast(&H) + LargeBlock::getHeaderSize()); } - static uptr canCache(uptr Size) { return CacheT::canCache(Size); } + uptr canCache(uptr Size) { return Cache.canCache(Size); } - void setReleaseToOsIntervalMs(s32 Interval) { - Cache.setReleaseToOsIntervalMs(Interval); - } + bool setOption(Option O, sptr Value) { return Cache.setOption(O, Value); } void releaseToOS() { Cache.releaseToOS(); } @@ -306,7 +333,7 @@ const uptr RoundedSize = roundUpTo(Size + LargeBlock::getHeaderSize(), PageSize); - if (AlignmentHint < PageSize && CacheT::canCache(RoundedSize)) { + if (AlignmentHint < PageSize && Cache.canCache(RoundedSize)) { LargeBlock::Header *H; if (Cache.retrieve(RoundedSize, &H)) { if (BlockEnd) @@ -400,7 +427,7 @@ Stats.sub(StatAllocated, CommitSize); Stats.sub(StatMapped, H->MapSize); } - if (CacheT::canCache(CommitSize) && Cache.store(H)) + if (Cache.canCache(CommitSize) && Cache.store(H)) return; void *Addr = reinterpret_cast(H->MapBase); const uptr Size = H->MapSize; diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -19,7 +19,7 @@ static std::mutex Mutex; static std::condition_variable Cv; -static bool Ready = false; +static bool Ready; static constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc; @@ -351,6 +351,7 @@ } template static void testAllocatorThreaded() { + Ready = false; using AllocatorT = TestAllocator; auto Allocator = std::unique_ptr(new AllocatorT()); std::thread Threads[32]; @@ -394,7 +395,7 @@ typedef scudo::SizeClassAllocator64 Primary; typedef scudo::MapAllocator Secondary; - template using TSDRegistryT = scudo::TSDRegistrySharedT; + template using TSDRegistryT = scudo::TSDRegistrySharedT; }; TEST(ScudoCombinedTest, DeathCombined) { diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -149,7 +149,7 @@ static std::mutex Mutex; static std::condition_variable Cv; -static bool Ready = false; +static bool Ready; template static void performAllocations(Primary *Allocator) { static THREADLOCAL typename Primary::CacheT Cache; @@ -176,6 +176,7 @@ } template static void testPrimaryThreaded() { + Ready = false; auto Deleter = [](Primary *P) { P->unmapTestOnly(); delete P; diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -21,7 +21,7 @@ template static void testSecondaryBasic(void) { scudo::GlobalStats S; S.init(); - SecondaryT *L = new SecondaryT; + std::unique_ptr L(new SecondaryT); L->init(&S); const scudo::uptr Size = 1U << 16; void *P = L->allocate(Size); @@ -30,7 +30,7 @@ EXPECT_GE(SecondaryT::getBlockSize(P), Size); L->deallocate(P); // If the Secondary can't cache that pointer, it will be unmapped. - if (!SecondaryT::canCache(Size)) + if (!L->canCache(Size)) EXPECT_DEATH(memset(P, 'A', Size), ""); const scudo::uptr Align = 1U << 16; @@ -59,7 +59,7 @@ #if !SCUDO_FUCHSIA testSecondaryBasic>>(); testSecondaryBasic< - scudo::MapAllocator>>(); + scudo::MapAllocator>>(); #endif } @@ -75,7 +75,7 @@ TEST(ScudoSecondaryTest, SecondaryCombinations) { constexpr scudo::uptr MinAlign = FIRST_32_SECOND_64(8, 16); constexpr scudo::uptr HeaderSize = scudo::roundUpTo(8, MinAlign); - LargeAllocator *L = new LargeAllocator; + std::unique_ptr L(new LargeAllocator); L->init(nullptr); for (scudo::uptr SizeLog = 0; SizeLog <= 20; SizeLog++) { for (scudo::uptr AlignLog = FIRST_32_SECOND_64(3, 4); AlignLog <= 16; @@ -103,7 +103,7 @@ } TEST(ScudoSecondaryTest, SecondaryIterate) { - LargeAllocator *L = new LargeAllocator; + std::unique_ptr L(new LargeAllocator); L->init(nullptr); std::vector V; const scudo::uptr PageSize = scudo::getPageSizeCached(); @@ -125,9 +125,32 @@ Str.output(); } +TEST(ScudoSecondaryTest, SecondaryOptions) { + std::unique_ptr L(new LargeAllocator); + L->init(nullptr); + // Attempt to set a maximum number of entries higher than the array size. + EXPECT_FALSE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4096U)); + // A negative number will be cast to a scudo::u32, and fail. + EXPECT_FALSE(L->setOption(scudo::Option::MaxCacheEntriesCount, -1)); + if (L->canCache(0U)) { + // Various valid combinations. + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4U)); + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20)); + EXPECT_TRUE(L->canCache(1UL << 18)); + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 17)); + EXPECT_FALSE(L->canCache(1UL << 18)); + EXPECT_TRUE(L->canCache(1UL << 16)); + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 0U)); + EXPECT_FALSE(L->canCache(1UL << 16)); + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4U)); + EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20)); + EXPECT_TRUE(L->canCache(1UL << 16)); + } +} + static std::mutex Mutex; static std::condition_variable Cv; -static bool Ready = false; +static bool Ready; static void performAllocations(LargeAllocator *L) { std::vector V; @@ -153,11 +176,12 @@ } TEST(ScudoSecondaryTest, SecondaryThreadsRace) { - LargeAllocator *L = new LargeAllocator; + Ready = false; + std::unique_ptr L(new LargeAllocator); L->init(nullptr, /*ReleaseToOsInterval=*/0); std::thread Threads[16]; for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) - Threads[I] = std::thread(performAllocations, L); + Threads[I] = std::thread(performAllocations, L.get()); { std::unique_lock Lock(Mutex); Ready = true; diff --git a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp @@ -13,6 +13,7 @@ #include #include +#include #include // We mock out an allocator with a TSD registry, mostly using empty stubs. The @@ -47,12 +48,12 @@ struct OneCache { template - using TSDRegistryT = scudo::TSDRegistrySharedT; + using TSDRegistryT = scudo::TSDRegistrySharedT; }; struct SharedCaches { template - using TSDRegistryT = scudo::TSDRegistrySharedT; + using TSDRegistryT = scudo::TSDRegistrySharedT; }; struct ExclusiveCaches { @@ -116,7 +117,7 @@ static std::mutex Mutex; static std::condition_variable Cv; -static bool Ready = false; +static bool Ready; template static void stressCache(AllocatorT *Allocator) { auto Registry = Allocator->getTSDRegistry(); @@ -145,6 +146,7 @@ } template static void testRegistryThreaded() { + Ready = false; auto Deleter = [](AllocatorT *A) { A->unmapTestOnly(); delete A; @@ -171,3 +173,73 @@ testRegistryThreaded>(); #endif } + +static std::set Pointers; + +static void stressSharedRegistry(MockAllocator *Allocator) { + std::set Set; + auto Registry = Allocator->getTSDRegistry(); + { + std::unique_lock Lock(Mutex); + while (!Ready) + Cv.wait(Lock); + } + Registry->initThreadMaybe(Allocator, /*MinimalInit=*/false); + bool UnlockRequired; + for (scudo::uptr I = 0; I < 4096U; I++) { + auto TSD = Registry->getTSDAndLock(&UnlockRequired); + EXPECT_NE(TSD, nullptr); + Set.insert(reinterpret_cast(TSD)); + if (UnlockRequired) + TSD->unlock(); + } + { + std::unique_lock Lock(Mutex); + Pointers.insert(Set.begin(), Set.end()); + } +} + +TEST(ScudoTSDTest, TSDRegistryTSDsCount) { + Ready = false; + using AllocatorT = MockAllocator; + auto Deleter = [](AllocatorT *A) { + A->unmapTestOnly(); + delete A; + }; + std::unique_ptr Allocator(new AllocatorT, + Deleter); + Allocator->reset(); + // We attempt to use as many TSDs as the shared cache offers by creating a + // decent amount of threads that will be run concurrently and attempt to get + // and lock TSDs. We put them all in a set and count the number of entries + // after we are done. + std::thread Threads[32]; + for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) + Threads[I] = std::thread(stressSharedRegistry, Allocator.get()); + { + std::unique_lock Lock(Mutex); + Ready = true; + Cv.notify_all(); + } + for (auto &T : Threads) + T.join(); + // The initial number of TSDs we get will be the minimum of the default count + // and the number of CPUs. + EXPECT_LE(Pointers.size(), 8U); + Pointers.clear(); + auto Registry = Allocator->getTSDRegistry(); + // Increase the number of TSDs to 16. + Registry->setOption(scudo::Option::MaxTSDsCount, 16); + Ready = false; + for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++) + Threads[I] = std::thread(stressSharedRegistry, Allocator.get()); + { + std::unique_lock Lock(Mutex); + Ready = true; + Cv.notify_all(); + } + for (auto &T : Threads) + T.join(); + // We should get 16 distinct TSDs back. + EXPECT_EQ(Pointers.size(), 16U); +} diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp @@ -389,6 +389,7 @@ TEST(ScudoWrappersCTest, DisableForkEnable) { pthread_t ThreadId; + Ready = false; EXPECT_EQ(pthread_create(&ThreadId, nullptr, &enableMalloc, nullptr), 0); // Wait for the thread to be warmed up. diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp @@ -79,7 +79,7 @@ static std::mutex Mutex; static std::condition_variable Cv; -static bool Ready = false; +static bool Ready; static void stressNew() { std::vector V; @@ -103,6 +103,7 @@ } TEST(ScudoWrappersCppTest, ThreadedNew) { + Ready = false; std::thread Threads[32]; for (size_t I = 0U; I < sizeof(Threads) / sizeof(Threads[0]); I++) Threads[I] = std::thread(stressNew); diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h --- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h +++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h @@ -66,6 +66,12 @@ Mutex.unlock(); } + bool setOption(Option O, UNUSED sptr Value) { + if (O == Option::MaxTSDsCount) + return false; + return true; + } + private: void initOnceMaybe(Allocator *Instance) { ScopedLock L(Mutex); diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -14,31 +14,16 @@ namespace scudo { -template struct TSDRegistrySharedT { +template +struct TSDRegistrySharedT { void initLinkerInitialized(Allocator *Instance) { Instance->initLinkerInitialized(); CHECK_EQ(pthread_key_create(&PThreadKey, nullptr), 0); // For non-TLS - const u32 NumberOfCPUs = getNumberOfCPUs(); - NumberOfTSDs = (SCUDO_ANDROID || NumberOfCPUs == 0) - ? MaxTSDCount - : Min(NumberOfCPUs, MaxTSDCount); - for (u32 I = 0; I < NumberOfTSDs; I++) + for (u32 I = 0; I < TSDsArraySize; I++) TSDs[I].initLinkerInitialized(Instance); - // Compute all the coprimes of NumberOfTSDs. This will be used to walk the - // array of TSDs in a random order. For details, see: - // https://lemire.me/blog/2017/09/18/visiting-all-values-in-an-array-exactly-once-in-random-order/ - for (u32 I = 0; I < NumberOfTSDs; I++) { - u32 A = I + 1; - u32 B = NumberOfTSDs; - // Find the GCD between I + 1 and NumberOfTSDs. If 1, they are coprimes. - while (B != 0) { - const u32 T = A; - A = B; - B = T % B; - } - if (A == 1) - CoPrimes[NumberOfCoPrimes++] = I + 1; - } + const u32 NumberOfCPUs = getNumberOfCPUs(); + setNumberOfTSDs((NumberOfCPUs == 0) ? DefaultTSDCount + : Min(NumberOfCPUs, DefaultTSDCount)); Initialized = true; } void init(Allocator *Instance) { @@ -66,21 +51,34 @@ if (TSD->tryLock()) return TSD; // If that fails, go down the slow path. + if (TSDsArraySize == 1U) { + // Only 1 TSD, not need to go any further. + // The compiler will optimize this one way or the other. + TSD->lock(); + return TSD; + } return getTSDAndLockSlow(TSD); } void disable() { Mutex.lock(); - for (u32 I = 0; I < NumberOfTSDs; I++) + for (u32 I = 0; I < TSDsArraySize; I++) TSDs[I].lock(); } void enable() { - for (s32 I = static_cast(NumberOfTSDs - 1); I >= 0; I--) + for (s32 I = static_cast(TSDsArraySize - 1); I >= 0; I--) TSDs[I].unlock(); Mutex.unlock(); } + bool setOption(Option O, sptr Value) { + if (O == Option::MaxTSDsCount) + return setNumberOfTSDs(static_cast(Value)); + // Not supported by the TSD Registry, but not an error either. + return true; + } + private: ALWAYS_INLINE void setCurrentTSD(TSD *CurrentTSD) { #if _BIONIC @@ -104,6 +102,32 @@ #endif } + bool setNumberOfTSDs(u32 N) { + ScopedLock L(MutexTSDs); + if (N < NumberOfTSDs) + return false; + if (N > TSDsArraySize) + N = TSDsArraySize; + NumberOfTSDs = N; + NumberOfCoPrimes = 0; + // Compute all the coprimes of NumberOfTSDs. This will be used to walk the + // array of TSDs in a random order. For details, see: + // https://lemire.me/blog/2017/09/18/visiting-all-values-in-an-array-exactly-once-in-random-order/ + for (u32 I = 0; I < N; I++) { + u32 A = I + 1; + u32 B = N; + // Find the GCD between I + 1 and N. If 1, they are coprimes. + while (B != 0) { + const u32 T = A; + A = B; + B = T % B; + } + if (A == 1) + CoPrimes[NumberOfCoPrimes++] = I + 1; + } + return true; + } + void initOnceMaybe(Allocator *Instance) { ScopedLock L(Mutex); if (LIKELY(Initialized)) @@ -120,17 +144,23 @@ } NOINLINE TSD *getTSDAndLockSlow(TSD *CurrentTSD) { - if (MaxTSDCount > 1U && NumberOfTSDs > 1U) { - // Use the Precedence of the current TSD as our random seed. Since we are - // in the slow path, it means that tryLock failed, and as a result it's - // very likely that said Precedence is non-zero. - const u32 R = static_cast(CurrentTSD->getPrecedence()); - const u32 Inc = CoPrimes[R % NumberOfCoPrimes]; - u32 Index = R % NumberOfTSDs; + // Use the Precedence of the current TSD as our random seed. Since we are + // in the slow path, it means that tryLock failed, and as a result it's + // very likely that said Precedence is non-zero. + const u32 R = static_cast(CurrentTSD->getPrecedence()); + u32 N, Inc; + { + ScopedLock L(MutexTSDs); + N = NumberOfTSDs; + DCHECK_NE(NumberOfCoPrimes, 0U); + Inc = CoPrimes[R % NumberOfCoPrimes]; + } + if (N > 1U) { + u32 Index = R % N; uptr LowestPrecedence = UINTPTR_MAX; TSD *CandidateTSD = nullptr; // Go randomly through at most 4 contexts and find a candidate. - for (u32 I = 0; I < Min(4U, NumberOfTSDs); I++) { + for (u32 I = 0; I < Min(4U, N); I++) { if (TSDs[Index].tryLock()) { setCurrentTSD(&TSDs[Index]); return &TSDs[Index]; @@ -142,8 +172,8 @@ LowestPrecedence = Precedence; } Index += Inc; - if (Index >= NumberOfTSDs) - Index -= NumberOfTSDs; + if (Index >= N) + Index -= N; } if (CandidateTSD) { CandidateTSD->lock(); @@ -160,19 +190,20 @@ atomic_u32 CurrentIndex; u32 NumberOfTSDs; u32 NumberOfCoPrimes; - u32 CoPrimes[MaxTSDCount]; + u32 CoPrimes[TSDsArraySize]; bool Initialized; HybridMutex Mutex; - TSD TSDs[MaxTSDCount]; + HybridMutex MutexTSDs; + TSD TSDs[TSDsArraySize]; #if SCUDO_LINUX && !_BIONIC static THREADLOCAL TSD *ThreadTSD; #endif }; #if SCUDO_LINUX && !_BIONIC -template +template THREADLOCAL TSD - *TSDRegistrySharedT::ThreadTSD; + *TSDRegistrySharedT::ThreadTSD; #endif } // namespace scudo