diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt --- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt @@ -10,7 +10,9 @@ list(APPEND SCUDO_CFLAGS -Werror=conversion -Wall - -nostdinc++) + -nostdinc++ + -O0 + -g) # Remove -stdlib= which is unused when passing -nostdinc++. string(REGEX REPLACE "-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) @@ -107,8 +109,8 @@ set(SCUDO_OBJECT_LIBS) if (COMPILER_RT_HAS_GWP_ASAN) - list(APPEND SCUDO_OBJECT_LIBS RTGwpAsan) - list(APPEND SCUDO_CFLAGS -DGWP_ASAN_HOOKS) + #list(APPEND SCUDO_OBJECT_LIBS RTGwpAsan) + #list(APPEND SCUDO_CFLAGS -DGWP_ASAN_HOOKS) endif() if(COMPILER_RT_HAS_SCUDO_STANDALONE) diff --git a/compiler-rt/lib/scudo/standalone/bytemap.h b/compiler-rt/lib/scudo/standalone/bytemap.h --- a/compiler-rt/lib/scudo/standalone/bytemap.h +++ b/compiler-rt/lib/scudo/standalone/bytemap.h @@ -34,6 +34,9 @@ return Map[Index]; } + void disable() {} + void enable() {} + private: u8 *Map; }; @@ -82,6 +85,9 @@ return Level2Map[Index % Level2Size]; } + NOINLINE void disable() { Mutex.lock(); } + NOINLINE void enable() { Mutex.unlock(); } + private: u8 *get(uptr Index) const { DCHECK_LT(Index, Level1Size); diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -31,15 +31,23 @@ static gwp_asan::GuardedPoolAllocator GuardedAlloc; #endif // GWP_ASAN_HOOKS +extern "C" inline void EmptyCallback() {} + namespace scudo { -template class Allocator { +template +class Allocator { public: using PrimaryT = typename Params::Primary; using CacheT = typename PrimaryT::CacheT; - typedef Allocator ThisT; + typedef Allocator ThisT; typedef typename Params::template TSDRegistryT TSDRegistryT; + void callPostInitCallback() { + static pthread_once_t OnceControl = PTHREAD_ONCE_INIT; + pthread_once(&OnceControl, PostInitCallback); + } + struct QuarantineCallback { explicit QuarantineCallback(ThisT &Instance, CacheT &LocalCache) : Allocator(Instance), Cache(LocalCache) {} @@ -417,15 +425,21 @@ // TODO(kostyak): disable() is currently best-effort. There are some small // windows of time when an allocation could still succeed after // this function finishes. We will revisit that later. - void disable() { + NOINLINE void disable() { initThreadMaybe(); TSDRegistry.disable(); + Stats.disable(); + Quarantine.disable(); + Primary.disable(); Secondary.disable(); } - void enable() { + NOINLINE void enable() { initThreadMaybe(); Secondary.enable(); + Primary.enable(); + Quarantine.enable(); + Stats.enable(); TSDRegistry.enable(); } diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -122,14 +122,27 @@ releaseToOSMaybe(Sci, ClassId); } - void disable() { - for (uptr I = 0; I < NumClasses; I++) - getSizeClassInfo(I)->Mutex.lock(); + NOINLINE void disable() { + // The BatchClassId must be locked last since other classes can use it. + for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { + if (static_cast(I) == SizeClassMap::BatchClassId) + continue; + getSizeClassInfo(static_cast(I))->Mutex.lock(); + } + getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.lock(); + RegionsStashMutex.lock(); + PossibleRegions.disable(); } - void enable() { - for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) - getSizeClassInfo(static_cast(I))->Mutex.unlock(); + NOINLINE void enable() { + PossibleRegions.enable(); + RegionsStashMutex.unlock(); + getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); + for (uptr I = 0; I < NumClasses; I++) { + if (static_cast(I) == SizeClassMap::BatchClassId) + continue; + getSizeClassInfo(I)->Mutex.unlock(); + } } template void iterateOverBlocks(F Callback) { diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -124,14 +124,23 @@ releaseToOSMaybe(Region, ClassId); } - void disable() { - for (uptr I = 0; I < NumClasses; I++) - getRegionInfo(I)->Mutex.lock(); + NOINLINE void disable() { + // The BatchClassId must be locked last since other classes can use it. + for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { + if (static_cast(I) == SizeClassMap::BatchClassId) + continue; + getRegionInfo(static_cast(I))->Mutex.lock(); + } + getRegionInfo(SizeClassMap::BatchClassId)->Mutex.lock(); } - void enable() { - for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) - getRegionInfo(static_cast(I))->Mutex.unlock(); + NOINLINE void enable() { + getRegionInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); + for (uptr I = 0; I < NumClasses; I++) { + if (I == SizeClassMap::BatchClassId) + continue; + getRegionInfo(I)->Mutex.unlock(); + } } template void iterateOverBlocks(F Callback) const { diff --git a/compiler-rt/lib/scudo/standalone/quarantine.h b/compiler-rt/lib/scudo/standalone/quarantine.h --- a/compiler-rt/lib/scudo/standalone/quarantine.h +++ b/compiler-rt/lib/scudo/standalone/quarantine.h @@ -205,7 +205,7 @@ ScopedLock L(CacheMutex); Cache.transfer(C); } - if (Cache.getSize() > getMaxSize() && RecyleMutex.tryLock()) + if (Cache.getSize() > getMaxSize() && RecycleMutex.tryLock()) recycle(atomic_load_relaxed(&MinSize), Cb); } @@ -214,7 +214,7 @@ ScopedLock L(CacheMutex); Cache.transfer(C); } - RecyleMutex.lock(); + RecycleMutex.lock(); recycle(0, Cb); } @@ -225,11 +225,22 @@ getMaxSize() >> 10, getCacheSize() >> 10); } + NOINLINE void disable() { + // RecycleMutex must be locked 1st since we grab CacheMutex within recycle. + RecycleMutex.lock(); + CacheMutex.lock(); + } + + NOINLINE void enable() { + CacheMutex.unlock(); + RecycleMutex.unlock(); + } + private: // Read-only data. alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex CacheMutex; CacheT Cache; - alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecyleMutex; + alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecycleMutex; atomic_uptr MinSize; atomic_uptr MaxSize; alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize; @@ -261,7 +272,7 @@ while (Cache.getSize() > MinSize) Tmp.enqueueBatch(Cache.dequeueBatch()); } - RecyleMutex.unlock(); + RecycleMutex.unlock(); doRecycle(&Tmp, Cb); } diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -79,9 +79,9 @@ void getStats(ScopedString *Str) const; - void disable() { Mutex.lock(); } + NOINLINE void disable() { Mutex.lock(); } - void enable() { Mutex.unlock(); } + NOINLINE void enable() { Mutex.unlock(); } template void iterateOverBlocks(F Callback) const { for (const auto &H : InUseBlocks) diff --git a/compiler-rt/lib/scudo/standalone/stats.h b/compiler-rt/lib/scudo/standalone/stats.h --- a/compiler-rt/lib/scudo/standalone/stats.h +++ b/compiler-rt/lib/scudo/standalone/stats.h @@ -87,6 +87,9 @@ S[I] = static_cast(S[I]) >= 0 ? S[I] : 0; } + NOINLINE void disable() { Mutex.lock(); } + NOINLINE void enable() { Mutex.unlock(); } + private: mutable HybridMutex Mutex; DoublyLinkedList StatsList; diff --git a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt --- a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt @@ -21,7 +21,7 @@ endif() if (COMPILER_RT_HAS_GWP_ASAN) - list(APPEND SCUDO_UNITTEST_CFLAGS -DGWP_ASAN_HOOKS) + #list(APPEND SCUDO_UNITTEST_CFLAGS -DGWP_ASAN_HOOKS) endif() set(SCUDO_TEST_ARCH ${SCUDO_STANDALONE_SUPPORTED_ARCH}) @@ -43,7 +43,7 @@ macro(add_scudo_unittest testname) cmake_parse_arguments(TEST "" "" "SOURCES;ADDITIONAL_RTOBJECTS" ${ARGN}) if (COMPILER_RT_HAS_GWP_ASAN) - list(APPEND TEST_ADDITIONAL_RTOBJECTS RTGwpAsan) + #list(APPEND TEST_ADDITIONAL_RTOBJECTS RTGwpAsan) endif() if(COMPILER_RT_HAS_SCUDO_STANDALONE) diff --git a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp @@ -36,6 +36,7 @@ void initCache(CacheT *Cache) { memset(Cache, 0, sizeof(*Cache)); } void commitBack(scudo::TSD *TSD) {} TSDRegistryT *getTSDRegistry() { return &TSDRegistry; } + void callPostInitCallback() {} bool isInitialized() { return Initialized; } diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp --- a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp @@ -310,4 +310,32 @@ fclose(F); EXPECT_EQ(strncmp(Buffer, " #include #include #include @@ -113,3 +114,55 @@ for (auto &T : Threads) T.join(); } + +#if !SCUDO_FUCHSIA +TEST(ScudoWrappersCppTest, AllocAfterFork) { + std::atomic_bool Stop; + + // Create threads that simply allocate and free different sizes. + std::vector Threads; + for (size_t N = 0; N < 5; N++) { + std::thread *T = new std::thread([&Stop] { + while (!Stop) { + for (size_t SizeLog = 3; SizeLog <= 21; SizeLog++) { + char *P = new char[1UL << SizeLog]; + EXPECT_NE(P, nullptr); + // Make sure this value is not optimized away. + asm volatile("" : : "r,m"(P) : "memory"); + delete[] P; + } + } + }); + Threads.push_back(T); + } + + // Create a thread to fork and allocate. + for (size_t N = 0; N < 100; N++) { + pid_t Pid; + if ((Pid = fork()) == 0) { + for (size_t SizeLog = 3; SizeLog <= 21; SizeLog++) { + char *P = new char[1UL << SizeLog]; + EXPECT_NE(P, nullptr); + // Make sure this value is not optimized away. + asm volatile("" : : "r,m"(P) : "memory"); + // Make sure we can touch all of the allocation. + memset(P, 0x32, 1U << SizeLog); + // EXPECT_LE(1U << SizeLog, malloc_usable_size(ptr)); + delete[] P; + } + _exit(10); + } + EXPECT_NE(-1, Pid); + int Status; + EXPECT_EQ(Pid, waitpid(Pid, &Status, 0)); + EXPECT_FALSE(WIFSIGNALED(Status)); + EXPECT_EQ(10, WEXITSTATUS(Status)); + } + + printf("Waiting for threads to complete\n"); + Stop = true; + for (auto Thread : Threads) + Thread->join(); + Threads.clear(); +} +#endif \ No newline at end of file diff --git a/compiler-rt/lib/scudo/standalone/tsd.h b/compiler-rt/lib/scudo/standalone/tsd.h --- a/compiler-rt/lib/scudo/standalone/tsd.h +++ b/compiler-rt/lib/scudo/standalone/tsd.h @@ -14,6 +14,7 @@ #include "mutex.h" #include // for PTHREAD_DESTRUCTOR_ITERATIONS +#include // With some build setups, this might still not be defined. #ifndef PTHREAD_DESTRUCTOR_ITERATIONS diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h --- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h +++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h @@ -11,8 +11,6 @@ #include "tsd.h" -#include - namespace scudo { enum class ThreadState : u8 { @@ -61,14 +59,16 @@ // To disable the exclusive TSD registry, we effectively lock the fallback TSD // and force all threads to attempt to use it instead of their local one. - void disable() { + NOINLINE void disable() { + Mutex.lock(); FallbackTSD->lock(); atomic_store(&Disabled, 1U, memory_order_release); } - void enable() { + NOINLINE void enable() { atomic_store(&Disabled, 0U, memory_order_release); FallbackTSD->unlock(); + Mutex.unlock(); } private: @@ -90,6 +90,7 @@ pthread_setspecific(PThreadKey, reinterpret_cast(Instance)), 0); ThreadTSD.initLinkerInitialized(Instance); State = ThreadState::Initialized; + Instance->callPostInitCallback(); } pthread_key_t PThreadKey; diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -12,8 +12,6 @@ #include "linux.h" // for getAndroidTlsPtr() #include "tsd.h" -#include - namespace scudo { template struct TSDRegistrySharedT { @@ -72,14 +70,16 @@ return getTSDAndLockSlow(TSD); } - void disable() { + NOINLINE void disable() { + Mutex.lock(); for (u32 I = 0; I < NumberOfTSDs; I++) TSDs[I].lock(); } - void enable() { + NOINLINE void enable() { for (u32 I = 0; I < NumberOfTSDs; I++) TSDs[I].unlock(); + Mutex.unlock(); } private: @@ -117,6 +117,7 @@ // Initial context assignment is done in a plain round-robin fashion. const u32 Index = atomic_fetch_add(&CurrentIndex, 1U, memory_order_relaxed); setCurrentTSD(&TSDs[Index % NumberOfTSDs]); + Instance->callPostInitCallback(); } NOINLINE TSD *getTSDAndLockSlow(TSD *CurrentTSD) { diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c.cpp --- a/compiler-rt/lib/scudo/standalone/wrappers_c.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.cpp @@ -18,22 +18,23 @@ #include #include -static scudo::Allocator Allocator; +#define SCUDO_PREFIX(name) name +#define SCUDO_ALLOCATOR Allocator + +extern "C" void SCUDO_PREFIX(malloc_postinit)(); +static scudo::Allocator + SCUDO_ALLOCATOR; // Pointer to the static allocator so that the C++ wrappers can access it. // Technically we could have a completely separated heap for C & C++ but in // reality the amount of cross pollination between the two is staggering. -scudo::Allocator *AllocatorPtr = &Allocator; - -extern "C" { +scudo::Allocator * + CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR; -#define SCUDO_PREFIX(name) name -#define SCUDO_ALLOCATOR Allocator #include "wrappers_c.inc" + #undef SCUDO_ALLOCATOR #undef SCUDO_PREFIX -INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } - -} // extern "C" +extern "C" INTERFACE void __scudo_print_stats(void) { Allocator.printStats(); } #endif // !SCUDO_ANDROID || !_BIONIC diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -17,6 +17,8 @@ #define SCUDO_MALLOC_ALIGNMENT FIRST_32_SECOND_64(8U, 16U) #endif +extern "C" { + INTERFACE WEAK void *SCUDO_PREFIX(calloc)(size_t nmemb, size_t size) { scudo::uptr Product; if (UNLIKELY(scudo::checkForCallocOverflow(size, nmemb, &Product))) { @@ -141,11 +143,31 @@ return 0; } +static pthread_mutex_t SCUDO_PREFIX(Mutex) = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t SCUDO_PREFIX(Conditional) = PTHREAD_COND_INITIALIZER; +static bool SCUDO_PREFIX(Disabled) = false; + +INTERFACE WEAK void SCUDO_PREFIX(malloc_enable)() { + pthread_mutex_lock(&SCUDO_PREFIX(Mutex)); + SCUDO_ALLOCATOR.enable(); + SCUDO_PREFIX(Disabled) = false; + pthread_cond_broadcast(&SCUDO_PREFIX(Conditional)); + pthread_mutex_unlock(&SCUDO_PREFIX(Mutex)); +} + INTERFACE WEAK void SCUDO_PREFIX(malloc_disable)() { + pthread_mutex_lock(&SCUDO_PREFIX(Mutex)); + while (SCUDO_PREFIX(Disabled)) + pthread_cond_wait(&SCUDO_PREFIX(Conditional), &SCUDO_PREFIX(Mutex)); SCUDO_ALLOCATOR.disable(); + SCUDO_PREFIX(Disabled) = true; + pthread_mutex_unlock(&SCUDO_PREFIX(Mutex)); } -INTERFACE WEAK void SCUDO_PREFIX(malloc_enable)() { SCUDO_ALLOCATOR.enable(); } +void SCUDO_PREFIX(malloc_postinit)() { + pthread_atfork(SCUDO_PREFIX(malloc_disable), SCUDO_PREFIX(malloc_enable), + SCUDO_PREFIX(malloc_enable)); +} INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, UNUSED int value) { if (param == M_DECAY_TIME) { @@ -176,3 +198,5 @@ fputs("", stream); return 0; } + +} // extern "C" diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp --- a/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp @@ -18,22 +18,40 @@ #include #include -static scudo::Allocator Allocator; -static scudo::Allocator SvelteAllocator; - -extern "C" { - // Regular MallocDispatch definitions. #define SCUDO_PREFIX(name) CONCATENATE(scudo_, name) #define SCUDO_ALLOCATOR Allocator + +extern "C" void SCUDO_PREFIX(malloc_postinit)(); +static scudo::Allocator + SCUDO_ALLOCATOR; +// Pointer to the static allocator so that the C++ wrappers can access it. +// Technically we could have a completely separated heap for C & C++ but in +// reality the amount of cross pollination between the two is staggering. +scudo::Allocator * + CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR; + #include "wrappers_c.inc" + #undef SCUDO_ALLOCATOR #undef SCUDO_PREFIX // Svelte MallocDispatch definitions. #define SCUDO_PREFIX(name) CONCATENATE(scudo_svelte_, name) #define SCUDO_ALLOCATOR SvelteAllocator + +extern "C" void SCUDO_PREFIX(malloc_postinit)(); +static scudo::Allocator + SCUDO_ALLOCATOR; +// Pointer to the static allocator so that the C++ wrappers can access it. +// Technically we could have a completely separated heap for C & C++ but in +// reality the amount of cross pollination between the two is staggering. +scudo::Allocator * + CONCATENATE(SCUDO_ALLOCATOR, Ptr) = &SCUDO_ALLOCATOR; + #include "wrappers_c.inc" + #undef SCUDO_ALLOCATOR #undef SCUDO_PREFIX @@ -44,6 +62,4 @@ SvelteAllocator.printStats(); } -} // extern "C" - #endif // SCUDO_ANDROID && _BIONIC diff --git a/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp b/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp --- a/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp +++ b/compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp @@ -15,7 +15,8 @@ #include -extern scudo::Allocator *AllocatorPtr; +extern "C" INTERFACE void malloc_postinit(); +extern scudo::Allocator *AllocatorPtr; namespace std { struct nothrow_t {};