Index: lib/scudo/scudo_allocator.cpp =================================================================== --- lib/scudo/scudo_allocator.cpp +++ lib/scudo/scudo_allocator.cpp @@ -495,7 +495,13 @@ // Deallocates a Chunk, which means adding it to the delayed free list (or // Quarantine). void deallocate(void *UserPtr, uptr DeleteSize, AllocType Type) { - initThreadMaybe(); + // For a deallocation, we only ensure minimal initialization, meaning thread + // local data will be left uninitialized for now (when using ELF TLS). The + // fallback cache will be used instead. This is a workaround for a situation + // where the only heap operation performed in a thread would be a free past + // the TLS destructors, ending up in initialized thread specific data never + // being destroyed properly. Any other heap operation will do a full init. + initThreadMaybe(/*MinimalInit=*/true); // if (&__sanitizer_free_hook) __sanitizer_free_hook(UserPtr); if (UNLIKELY(!UserPtr)) return; Index: lib/scudo/scudo_tls.h =================================================================== --- lib/scudo/scudo_tls.h +++ lib/scudo/scudo_tls.h @@ -36,7 +36,7 @@ void commitBack(); }; -void initThread(); +void initThread(bool MinimalInit); // Platform specific dastpath functions definitions. #include "scudo_tls_android.inc" Index: lib/scudo/scudo_tls_android.cpp =================================================================== --- lib/scudo/scudo_tls_android.cpp +++ lib/scudo/scudo_tls_android.cpp @@ -49,7 +49,7 @@ ThreadContexts[i].init(); } -void initThread() { +void initThread(bool MinimalInit) { pthread_once(&GlobalInitialized, initOnce); // Initial context assignment is done in a plain round-robin fashion. u32 Index = atomic_fetch_add(&ThreadContextCurrentIndex, 1, Index: lib/scudo/scudo_tls_android.inc =================================================================== --- lib/scudo/scudo_tls_android.inc +++ lib/scudo/scudo_tls_android.inc @@ -20,10 +20,10 @@ #if SANITIZER_LINUX && SANITIZER_ANDROID -ALWAYS_INLINE void initThreadMaybe() { +ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) { if (LIKELY(*get_android_tls_ptr())) return; - initThread(); + initThread(MinimalInit); } ScudoThreadContext *getThreadContextAndLockSlow(); Index: lib/scudo/scudo_tls_linux.cpp =================================================================== --- lib/scudo/scudo_tls_linux.cpp +++ lib/scudo/scudo_tls_linux.cpp @@ -53,8 +53,10 @@ initScudo(); } -void initThread() { +void initThread(bool MinimalInit) { CHECK_EQ(pthread_once(&GlobalInitialized, initOnce), 0); + if (UNLIKELY(MinimalInit)) + return; CHECK_EQ(pthread_setspecific(PThreadKey, reinterpret_cast( GetPthreadDestructorIterations())), 0); ThreadLocalContext.init(); Index: lib/scudo/scudo_tls_linux.inc =================================================================== --- lib/scudo/scudo_tls_linux.inc +++ lib/scudo/scudo_tls_linux.inc @@ -31,14 +31,14 @@ __attribute__((tls_model("initial-exec"))) extern THREADLOCAL ScudoThreadContext ThreadLocalContext; -ALWAYS_INLINE void initThreadMaybe() { +ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) { if (LIKELY(ScudoThreadState != ThreadNotInitialized)) return; - initThread(); + initThread(MinimalInit); } ALWAYS_INLINE ScudoThreadContext *getThreadContextAndLock() { - if (UNLIKELY(ScudoThreadState == ThreadTornDown)) + if (UNLIKELY(ScudoThreadState != ThreadInitialized)) return nullptr; return &ThreadLocalContext; } Index: test/scudo/tsd_destruction.cpp =================================================================== --- /dev/null +++ test/scudo/tsd_destruction.cpp @@ -0,0 +1,42 @@ +// RUN: %clang_scudo %s -o %t +// RUN: %run %t 2>&1 + +#include +#include +#include +#include +#include + +// Some of glibc's own thread local data is destroyed after a user's thread +// local destructors are called, via __libc_thread_freeres. This might involve +// calling free, as is the case for strerror_thread_freeres. +// If there is no prior heap operation in the thread, this free would end up +// initializing some thread specific data that would never be destroyed +// properly, while still being deallocated when the TLS goes away. As a result, +// a program could SEGV, usually in +// __sanitizer::AllocatorGlobalStats::Unregister, where one of the doubly +// linked list links would refer to a now unmapped memory area. + +// This test reproduces those circumstances. Success means executing without +// a segmentation fault. + +const int kNumThreads = 16; +pthread_t tid[kNumThreads]; + +void *thread_func(void *arg) { + uintptr_t i = (uintptr_t)arg; + if ((i & 1) == 0) free(malloc(16)); + // Calling strerror_l allows for strerror_thread_freeres to be called. + strerror_l(0, LC_GLOBAL_LOCALE); + return 0; +} + +int main(int argc, char** argv) { + for (uintptr_t j = 0; j < 8; j++) { + for (uintptr_t i = 0; i < kNumThreads; i++) + pthread_create(&tid[i], 0, thread_func, (void *)i); + for (uintptr_t i = 0; i < kNumThreads; i++) + pthread_join(tid[i], 0); + } + return 0; +}