diff --git a/compiler-rt/lib/scudo/standalone/bytemap.h b/compiler-rt/lib/scudo/standalone/bytemap.h --- a/compiler-rt/lib/scudo/standalone/bytemap.h +++ b/compiler-rt/lib/scudo/standalone/bytemap.h @@ -34,6 +34,9 @@ return Map[Index]; } + void disable() {} + void enable() {} + private: u8 *Map; }; @@ -82,6 +85,9 @@ return Level2Map[Index % Level2Size]; } + void disable() { Mutex.lock(); } + void enable() { Mutex.unlock(); } + private: u8 *get(uptr Index) const { DCHECK_LT(Index, Level1Size); diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -31,15 +31,23 @@ static gwp_asan::GuardedPoolAllocator GuardedAlloc; #endif // GWP_ASAN_HOOKS +extern "C" inline void EmptyCallback() {} + namespace scudo { -template class Allocator { +template +class Allocator { public: using PrimaryT = typename Params::Primary; using CacheT = typename PrimaryT::CacheT; - typedef Allocator ThisT; + typedef Allocator ThisT; typedef typename Params::template TSDRegistryT TSDRegistryT; + void callPostInitCallback() { + static pthread_once_t OnceControl = PTHREAD_ONCE_INIT; + pthread_once(&OnceControl, PostInitCallback); + } + struct QuarantineCallback { explicit QuarantineCallback(ThisT &Instance, CacheT &LocalCache) : Allocator(Instance), Cache(LocalCache) {} @@ -420,12 +428,18 @@ void disable() { initThreadMaybe(); TSDRegistry.disable(); + Stats.disable(); + Quarantine.disable(); + Primary.disable(); Secondary.disable(); } void enable() { initThreadMaybe(); Secondary.enable(); + Primary.enable(); + Quarantine.enable(); + Stats.enable(); TSDRegistry.enable(); } diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -123,13 +123,26 @@ } void disable() { - for (uptr I = 0; I < NumClasses; I++) - getSizeClassInfo(I)->Mutex.lock(); + // The BatchClassId must be locked last since other classes can use it. + for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { + if (static_cast(I) == SizeClassMap::BatchClassId) + continue; + getSizeClassInfo(static_cast(I))->Mutex.lock(); + } + getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.lock(); + RegionsStashMutex.lock(); + PossibleRegions.disable(); } void enable() { - for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) - getSizeClassInfo(static_cast(I))->Mutex.unlock(); + PossibleRegions.enable(); + RegionsStashMutex.unlock(); + getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); + for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; + getSizeClassInfo(I)->Mutex.unlock(); + } } template void iterateOverBlocks(F Callback) { diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -125,13 +125,22 @@ } void disable() { - for (uptr I = 0; I < NumClasses; I++) - getRegionInfo(I)->Mutex.lock(); + // The BatchClassId must be locked last since other classes can use it. + for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { + if (static_cast(I) == SizeClassMap::BatchClassId) + continue; + getRegionInfo(static_cast(I))->Mutex.lock(); + } + getRegionInfo(SizeClassMap::BatchClassId)->Mutex.lock(); } void enable() { - for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) - getRegionInfo(static_cast(I))->Mutex.unlock(); + getRegionInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); + for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; + getRegionInfo(I)->Mutex.unlock(); + } } template void iterateOverBlocks(F Callback) const { diff --git a/compiler-rt/lib/scudo/standalone/quarantine.h b/compiler-rt/lib/scudo/standalone/quarantine.h --- a/compiler-rt/lib/scudo/standalone/quarantine.h +++ b/compiler-rt/lib/scudo/standalone/quarantine.h @@ -205,7 +205,7 @@ ScopedLock L(CacheMutex); Cache.transfer(C); } - if (Cache.getSize() > getMaxSize() && RecyleMutex.tryLock()) + if (Cache.getSize() > getMaxSize() && RecycleMutex.tryLock()) recycle(atomic_load_relaxed(&MinSize), Cb); } @@ -214,7 +214,7 @@ ScopedLock L(CacheMutex); Cache.transfer(C); } - RecyleMutex.lock(); + RecycleMutex.lock(); recycle(0, Cb); } @@ -225,11 +225,22 @@ getMaxSize() >> 10, getCacheSize() >> 10); } + void disable() { + // RecycleMutex must be locked 1st since we grab CacheMutex within recycle. + RecycleMutex.lock(); + CacheMutex.lock(); + } + + void enable() { + CacheMutex.unlock(); + RecycleMutex.unlock(); + } + private: // Read-only data. alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex CacheMutex; CacheT Cache; - alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecyleMutex; + alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecycleMutex; atomic_uptr MinSize; atomic_uptr MaxSize; alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize; @@ -261,7 +272,7 @@ while (Cache.getSize() > MinSize) Tmp.enqueueBatch(Cache.dequeueBatch()); } - RecyleMutex.unlock(); + RecycleMutex.unlock(); doRecycle(&Tmp, Cb); } diff --git a/compiler-rt/lib/scudo/standalone/stats.h b/compiler-rt/lib/scudo/standalone/stats.h --- a/compiler-rt/lib/scudo/standalone/stats.h +++ b/compiler-rt/lib/scudo/standalone/stats.h @@ -87,6 +87,9 @@ S[I] = static_cast(S[I]) >= 0 ? S[I] : 0; } + void disable() { Mutex.lock(); } + void enable() { Mutex.unlock(); } + private: mutable HybridMutex Mutex; DoublyLinkedList StatsList; diff --git a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp @@ -36,6 +36,7 @@ void initCache(CacheT *Cache) { memset(Cache, 0, sizeof(*Cache)); } void commitBack(scudo::TSD *TSD) {} TSDRegistryT *getTSDRegistry() { return &TSDRegistry; } + void callPostInitCallback() {} bool isInitialized() { return Initialized; } diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp @@ -299,7 +299,9 @@ ""); } +// Fuchsia doesn't have fork or malloc_info. #if !SCUDO_FUCHSIA + TEST(ScudoWrappersCTest, MallocInfo) { char Buffer[64]; FILE *F = fmemopen(Buffer, sizeof(Buffer), "w+"); @@ -310,4 +312,79 @@ fclose(F); EXPECT_EQ(strncmp(Buffer, " #include #include #include @@ -113,3 +114,59 @@ for (auto &T : Threads) T.join(); } + +#if !SCUDO_FUCHSIA +// TODO(kostyak): for me, this test fails in a specific configuration when ran +// by itself with some Scudo or GWP-ASan violation. Other people +// can't seem to reproduce the failure. Consider skipping this in +// the event it fails on the upstream bots. +TEST(ScudoWrappersCppTest, AllocAfterFork) { + std::atomic_bool Stop; + + // Create threads that simply allocate and free different sizes. + std::vector Threads; + for (size_t N = 0; N < 5; N++) { + std::thread *T = new std::thread([&Stop] { + while (!Stop) { + for (size_t SizeLog = 3; SizeLog <= 21; SizeLog++) { + char *P = new char[1UL << SizeLog]; + EXPECT_NE(P, nullptr); + // Make sure this value is not optimized away. + asm volatile("" : : "r,m"(P) : "memory"); + delete[] P; + } + } + }); + Threads.push_back(T); + } + + // Create a thread to fork and allocate. + for (size_t N = 0; N < 100; N++) { + pid_t Pid; + if ((Pid = fork()) == 0) { + for (size_t SizeLog = 3; SizeLog <= 21; SizeLog++) { + char *P = new char[1UL << SizeLog]; + EXPECT_NE(P, nullptr); + // Make sure this value is not optimized away. + asm volatile("" : : "r,m"(P) : "memory"); + // Make sure we can touch all of the allocation. + memset(P, 0x32, 1U << SizeLog); + // EXPECT_LE(1U << SizeLog, malloc_usable_size(ptr)); + delete[] P; + } + _exit(10); + } + EXPECT_NE(-1, Pid); + int Status; + EXPECT_EQ(Pid, waitpid(Pid, &Status, 0)); + EXPECT_FALSE(WIFSIGNALED(Status)); + EXPECT_EQ(10, WEXITSTATUS(Status)); + } + + printf("Waiting for threads to complete\n"); + Stop = true; + for (auto Thread : Threads) + Thread->join(); + Threads.clear(); +} +#endif diff --git a/compiler-rt/lib/scudo/standalone/tsd.h b/compiler-rt/lib/scudo/standalone/tsd.h --- a/compiler-rt/lib/scudo/standalone/tsd.h +++ b/compiler-rt/lib/scudo/standalone/tsd.h @@ -14,6 +14,7 @@ #include "mutex.h" #include // for PTHREAD_DESTRUCTOR_ITERATIONS +#include // With some build setups, this might still not be defined. #ifndef PTHREAD_DESTRUCTOR_ITERATIONS diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h --- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h +++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h @@ -11,8 +11,6 @@ #include "tsd.h" -#include - namespace scudo { enum class ThreadState : u8 { @@ -62,6 +60,7 @@ // To disable the exclusive TSD registry, we effectively lock the fallback TSD // and force all threads to attempt to use it instead of their local one. void disable() { + Mutex.lock(); FallbackTSD->lock(); atomic_store(&Disabled, 1U, memory_order_release); } @@ -69,6 +68,7 @@ void enable() { atomic_store(&Disabled, 0U, memory_order_release); FallbackTSD->unlock(); + Mutex.unlock(); } private: @@ -90,6 +90,7 @@ pthread_setspecific(PThreadKey, reinterpret_cast(Instance)), 0); ThreadTSD.initLinkerInitialized(Instance); State = ThreadState::Initialized; + Instance->callPostInitCallback(); } pthread_key_t PThreadKey; diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -12,8 +12,6 @@ #include "linux.h" // for getAndroidTlsPtr() #include "tsd.h" -#include - namespace scudo { template struct TSDRegistrySharedT { @@ -73,13 +71,15 @@ } void disable() { + Mutex.lock(); for (u32 I = 0; I < NumberOfTSDs; I++) TSDs[I].lock(); } void enable() { - for (u32 I = 0; I < NumberOfTSDs; I++) + for (s32 I = NumberOfTSDs - 1; I >= 0; I--) TSDs[I].unlock(); + Mutex.unlock(); } private: @@ -117,6 +117,7 @@ // Initial context assignment is done in a plain round-robin fashion. const u32 Index = atomic_fetch_add(&CurrentIndex, 1U, memory_order_relaxed); setCurrentTSD(&TSDs[Index % NumberOfTSDs]); + Instance->callPostInitCallback(); } NOINLINE TSD *getTSDAndLockSlow(TSD *CurrentTSD) { diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c.cpp --- a/compiler-rt/lib/scudo/standalone/wrappers_c.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.cpp @@ -18,22 +18,23 @@ #include #include -static scudo::Allocator Allocator; +#define SCUDO_PREFIX(name) name +#define SCUDO_ALLOCATOR Allocator + +extern "C" void SCUDO_PREFIX(malloc_postinit)(); +static scudo::Allocator + SCUDO_ALLOCATOR; // Pointer to the static allocator so that the C++ wrappers can access it. // Technically we could have a completely separated heap for C & C++ but in // reality the amount of cross pollination between the two is staggering. -scudo::Allocator *AllocatorPtr = &Allocator; - -extern "C" { +scudo::Allocator * + CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR; -#define SCUDO_PREFIX(name) name -#define SCUDO_ALLOCATOR Allocator #include "wrappers_c.inc" + #undef SCUDO_ALLOCATOR #undef SCUDO_PREFIX -INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } - -} // extern "C" +extern "C" INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } #endif // !SCUDO_ANDROID || !_BIONIC diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -17,6 +17,8 @@ #define SCUDO_MALLOC_ALIGNMENT FIRST_32_SECOND_64(8U, 16U) #endif +extern "C" { + INTERFACE WEAK void *SCUDO_PREFIX(calloc)(size_t nmemb, size_t size) { scudo::uptr Product; if (UNLIKELY(scudo::checkForCallocOverflow(size, nmemb, &Product))) { @@ -141,11 +143,16 @@ return 0; } +INTERFACE WEAK void SCUDO_PREFIX(malloc_enable)() { SCUDO_ALLOCATOR.enable(); } + INTERFACE WEAK void SCUDO_PREFIX(malloc_disable)() { SCUDO_ALLOCATOR.disable(); } -INTERFACE WEAK void SCUDO_PREFIX(malloc_enable)() { SCUDO_ALLOCATOR.enable(); } +void SCUDO_PREFIX(malloc_postinit)() { + pthread_atfork(SCUDO_PREFIX(malloc_disable), SCUDO_PREFIX(malloc_enable), + SCUDO_PREFIX(malloc_enable)); +} INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, UNUSED int value) { if (param == M_DECAY_TIME) { @@ -176,3 +183,5 @@ fputs("", stream); return 0; } + +} // extern "C" diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp --- a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp @@ -18,22 +18,40 @@ #include #include -static scudo::Allocator Allocator; -static scudo::Allocator SvelteAllocator; - -extern "C" { - // Regular MallocDispatch definitions. #define SCUDO_PREFIX(name) CONCATENATE(scudo_, name) #define SCUDO_ALLOCATOR Allocator + +extern "C" void SCUDO_PREFIX(malloc_postinit)(); +static scudo::Allocator + SCUDO_ALLOCATOR; +// Pointer to the static allocator so that the C++ wrappers can access it. +// Technically we could have a completely separated heap for C & C++ but in +// reality the amount of cross pollination between the two is staggering. +scudo::Allocator * + CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR; + #include "wrappers_c.inc" + #undef SCUDO_ALLOCATOR #undef SCUDO_PREFIX // Svelte MallocDispatch definitions. #define SCUDO_PREFIX(name) CONCATENATE(scudo_svelte_, name) #define SCUDO_ALLOCATOR SvelteAllocator + +extern "C" void SCUDO_PREFIX(malloc_postinit)(); +static scudo::Allocator + SCUDO_ALLOCATOR; +// Pointer to the static allocator so that the C++ wrappers can access it. +// Technically we could have a completely separated heap for C & C++ but in +// reality the amount of cross pollination between the two is staggering. +scudo::Allocator * + CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR; + #include "wrappers_c.inc" + #undef SCUDO_ALLOCATOR #undef SCUDO_PREFIX @@ -44,6 +62,4 @@ SvelteAllocator.printStats(); } -} // extern "C" - #endif // SCUDO_ANDROID && _BIONIC diff --git a/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp b/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp --- a/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp @@ -15,7 +15,8 @@ #include -extern scudo::Allocator *AllocatorPtr; +extern "C" void malloc_postinit(); +extern scudo::Allocator *AllocatorPtr; namespace std { struct nothrow_t {};