Index: lib/scudo/CMakeLists.txt =================================================================== --- lib/scudo/CMakeLists.txt +++ lib/scudo/CMakeLists.txt @@ -14,6 +14,7 @@ scudo_interceptors.cpp scudo_new_delete.cpp scudo_termination.cpp + scudo_tls_android.cpp scudo_tls_linux.cpp scudo_utils.cpp) Index: lib/scudo/scudo_allocator.h =================================================================== --- lib/scudo/scudo_allocator.h +++ lib/scudo/scudo_allocator.h @@ -72,7 +72,13 @@ #if SANITIZER_CAN_USE_ALLOCATOR64 const uptr AllocatorSpace = ~0ULL; -const uptr AllocatorSize = 0x40000000000ULL; // 4TB. +# if defined(__aarch64__) && SANITIZER_ANDROID +const uptr AllocatorSize = 0x4000000000ULL; // 256G. +# elif defined(__aarch64__) +const uptr AllocatorSize = 0x10000000000ULL; // 1T. +# else +const uptr AllocatorSize = 0x40000000000ULL; // 4T. +# endif typedef DefaultSizeClassMap SizeClassMap; struct AP { static const uptr kSpaceBeg = AllocatorSpace; Index: lib/scudo/scudo_allocator.cpp =================================================================== --- lib/scudo/scudo_allocator.cpp +++ lib/scudo/scudo_allocator.cpp @@ -368,11 +368,12 @@ void *Ptr; uptr Salt; uptr AllocationAlignment = FromPrimary ? MinAlignment : Alignment; - ScudoThreadContext *ThreadContext = getThreadContext(); + ScudoThreadContext *ThreadContext = getThreadContextAndLock(); if (LIKELY(ThreadContext)) { Salt = getPrng(ThreadContext)->getNext(); Ptr = BackendAllocator.Allocate(getAllocatorCache(ThreadContext), NeededSize, AllocationAlignment); + ThreadContext->unlock(); } else { SpinMutexLock l(&FallbackMutex); Salt = FallbackPrng.getNext(); @@ -434,9 +435,10 @@ if (BypassQuarantine) { Chunk->eraseHeader(); void *Ptr = Chunk->getAllocBeg(Header); - ScudoThreadContext *ThreadContext = getThreadContext(); + ScudoThreadContext *ThreadContext = getThreadContextAndLock(); if (LIKELY(ThreadContext)) { getBackendAllocator().Deallocate(getAllocatorCache(ThreadContext), Ptr); + ThreadContext->unlock(); } else { SpinMutexLock Lock(&FallbackMutex); getBackendAllocator().Deallocate(&FallbackAllocatorCache, Ptr); @@ -445,12 +447,13 @@ UnpackedHeader NewHeader = *Header; NewHeader.State = ChunkQuarantine; Chunk->compareExchangeHeader(&NewHeader, Header); - ScudoThreadContext *ThreadContext = getThreadContext(); + ScudoThreadContext *ThreadContext = getThreadContextAndLock(); if (LIKELY(ThreadContext)) { AllocatorQuarantine.Put(getQuarantineCache(ThreadContext), QuarantineCallback( getAllocatorCache(ThreadContext)), Chunk, Size); + ThreadContext->unlock(); } else { SpinMutexLock l(&FallbackMutex); AllocatorQuarantine.Put(&FallbackQuarantineCache, Index: lib/scudo/scudo_tls.h =================================================================== --- lib/scudo/scudo_tls.h +++ lib/scudo/scudo_tls.h @@ -19,20 +19,29 @@ #include "scudo_allocator.h" #include "scudo_utils.h" +#include "sanitizer_common/sanitizer_linux.h" +#include "sanitizer_common/sanitizer_platform.h" + namespace __scudo { struct ALIGNED(64) ScudoThreadContext { public: + StaticSpinMutex Mutex; + atomic_uint64_t SlowLockPrecedence; AllocatorCache Cache; Xorshift128Plus Prng; uptr QuarantineCachePlaceHolder[4]; void init(); void commitBack(); + bool tryLock(); + void lock(); + void unlock(); }; void initThread(); // Fastpath functions are defined in the following platform specific headers. +#include "scudo_tls_android.h" #include "scudo_tls_linux.h" } // namespace __scudo Index: lib/scudo/scudo_tls_android.h =================================================================== --- /dev/null +++ lib/scudo/scudo_tls_android.h @@ -0,0 +1,63 @@ +//===-- scudo_tls_android.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// Scudo thread local structure fastpath functions implementation for Android. +/// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_TLS_ANDROID_H_ +#define SCUDO_TLS_ANDROID_H_ + +#ifndef SCUDO_TLS_H_ +# error "This file must be included inside scudo_tls.h." +#endif // SCUDO_TLS_H_ + +#if SANITIZER_LINUX && SANITIZER_ANDROID + +INLINE bool ScudoThreadContext::tryLock() { + if (Mutex.TryLock()) { + atomic_store_relaxed(&SlowLockPrecedence, 0); + return true; + } + if (atomic_load_relaxed(&SlowLockPrecedence) == 0) + atomic_store_relaxed(&SlowLockPrecedence, NanoTime()); + return false; +} + +INLINE void ScudoThreadContext::lock() { + Mutex.Lock(); + atomic_store_relaxed(&SlowLockPrecedence, 0); +} + +INLINE void ScudoThreadContext::unlock() { + Mutex.Unlock(); +} + +ALWAYS_INLINE void initThreadMaybe() { + if (LIKELY(*get_android_tls_ptr())) + return; + initThread(); +} + +ScudoThreadContext *getThreadContextAndLockSlow(); + +ALWAYS_INLINE ScudoThreadContext *getThreadContextAndLock() { + ScudoThreadContext *ThreadContext = + reinterpret_cast(*get_android_tls_ptr()); + CHECK(ThreadContext); + // Try to lock the currently associated context. + if (ThreadContext->tryLock()) + return ThreadContext; + // If it failed, go the slow path. + return getThreadContextAndLockSlow(); +} + +#endif // SANITIZER_LINUX && SANITIZER_ANDROID + +#endif // SCUDO_TLS_ANDROID_H_ Index: lib/scudo/scudo_tls_android.cpp =================================================================== --- /dev/null +++ lib/scudo/scudo_tls_android.cpp @@ -0,0 +1,96 @@ +//===-- scudo_tls_android.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// Scudo thread local structure implementation for Android. +/// +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_platform.h" + +#if SANITIZER_LINUX && SANITIZER_ANDROID + +#include "scudo_tls.h" + +#include + +namespace __scudo { + +static pthread_once_t GlobalInitialized = PTHREAD_ONCE_INIT; +static pthread_key_t PThreadKey; + +static atomic_uint32_t ThreadContextCurrentIndex; +static ScudoThreadContext *ThreadContexts; +static uptr NumberOfContexts; + +// sysconf(_SC_NPROCESSORS_{CONF,ONLN}) cannot be used as they allocate memory. +static uptr getNumberOfCPUs() { + cpu_set_t CPUs; + CHECK_EQ(sched_getaffinity(0, sizeof(cpu_set_t), &CPUs), 0); + return CPU_COUNT(&CPUs); +} + +static void initOnce() { + // Hack: TLS_SLOT_TSAN was introduced in N. To be able to use it on M for + // testing, we create an unused key. Since the key_data array follows the tls + // array, it basically gives us the extra entry we need. + // TODO(kostyak): remove and restrict to N and above. + CHECK_EQ(pthread_key_create(&PThreadKey, NULL), 0); + initScudo(); + NumberOfContexts = getNumberOfCPUs(); + ThreadContexts = reinterpret_cast( + MmapOrDie(sizeof(ScudoThreadContext) * NumberOfContexts, __func__)); + for (int i = 0; i < NumberOfContexts; i++) + ThreadContexts[i].init(); +} + +void initThread() { + pthread_once(&GlobalInitialized, initOnce); + // Initial context assignment is done in a plain round-robin fashion. + u32 Index = atomic_fetch_add(&ThreadContextCurrentIndex, 1, + memory_order_relaxed); + ScudoThreadContext *ThreadContext = + &ThreadContexts[Index % NumberOfContexts]; + *get_android_tls_ptr() = reinterpret_cast(ThreadContext); +} + +ScudoThreadContext *getThreadContextAndLockSlow() { + ScudoThreadContext *ThreadContext; + // Go through all the contexts and find the first unlocked one. + for (u32 i = 0; i < NumberOfContexts; i++) { + ThreadContext = &ThreadContexts[i]; + if (ThreadContext->tryLock()) { + *get_android_tls_ptr() = reinterpret_cast(ThreadContext); + return ThreadContext; + } + } + // No luck, find the one with the lowest precedence, and slow lock it. + u64 LowestPrecedence = UINT64_MAX; + for (u32 i = 0; i < NumberOfContexts; i++) { + u64 SlowLockPrecedence = + atomic_load_relaxed(&ThreadContexts[i].SlowLockPrecedence); + if (SlowLockPrecedence && SlowLockPrecedence < LowestPrecedence) { + ThreadContext = &ThreadContexts[i]; + LowestPrecedence = SlowLockPrecedence; + } + } + if (LIKELY(LowestPrecedence != UINT64_MAX)) { + ThreadContext->lock(); + *get_android_tls_ptr() = reinterpret_cast(ThreadContext); + return ThreadContext; + } + // Last resort (can this happen?), stick with the current one. + ThreadContext = + reinterpret_cast(*get_android_tls_ptr()); + ThreadContext->lock(); + return ThreadContext; +} + +} // namespace __scudo + +#endif // SANITIZER_LINUX && SANITIZER_ANDROID Index: lib/scudo/scudo_tls_linux.h =================================================================== --- lib/scudo/scudo_tls_linux.h +++ lib/scudo/scudo_tls_linux.h @@ -19,17 +19,23 @@ # error "This file must be included inside scudo_tls.h." #endif // SCUDO_TLS_H_ -#include "sanitizer_common/sanitizer_platform.h" +#if SANITIZER_LINUX && !SANITIZER_ANDROID -#if SANITIZER_LINUX +// For thread_local contexts, there is no locking. unlock() is the only function +// used within scudo_allocator.cpp, and should be optimized out by the compiler. +UNUSED INLINE bool ScudoThreadContext::tryLock() { return true; } +UNUSED INLINE void ScudoThreadContext::lock() {} +ALWAYS_INLINE void ScudoThreadContext::unlock() {} enum ThreadState : u8 { ThreadNotInitialized = 0, ThreadInitialized, ThreadTornDown, }; -extern thread_local ThreadState ScudoThreadState; -extern thread_local ScudoThreadContext ThreadLocalContext; +__attribute__((tls_model("initial-exec"))) +extern THREADLOCAL ThreadState ScudoThreadState; +__attribute__((tls_model("initial-exec"))) +extern THREADLOCAL ScudoThreadContext ThreadLocalContext; ALWAYS_INLINE void initThreadMaybe() { if (LIKELY(ScudoThreadState != ThreadNotInitialized)) @@ -37,12 +43,12 @@ initThread(); } -ALWAYS_INLINE ScudoThreadContext *getThreadContext() { +ALWAYS_INLINE ScudoThreadContext *getThreadContextAndLock() { if (UNLIKELY(ScudoThreadState == ThreadTornDown)) return nullptr; return &ThreadLocalContext; } -#endif // SANITIZER_LINUX +#endif // SANITIZER_LINUX && !SANITIZER_ANDROID #endif // SCUDO_TLS_LINUX_H_ Index: lib/scudo/scudo_tls_linux.cpp =================================================================== --- lib/scudo/scudo_tls_linux.cpp +++ lib/scudo/scudo_tls_linux.cpp @@ -14,7 +14,7 @@ #include "sanitizer_common/sanitizer_platform.h" -#if SANITIZER_LINUX +#if SANITIZER_LINUX && !SANITIZER_ANDROID #include "scudo_tls.h" @@ -26,8 +26,10 @@ static pthread_once_t GlobalInitialized = PTHREAD_ONCE_INIT; static pthread_key_t PThreadKey; -thread_local ThreadState ScudoThreadState = ThreadNotInitialized; -thread_local ScudoThreadContext ThreadLocalContext; +__attribute__((tls_model("initial-exec"))) +THREADLOCAL ThreadState ScudoThreadState = ThreadNotInitialized; +__attribute__((tls_model("initial-exec"))) +THREADLOCAL ScudoThreadContext ThreadLocalContext; static void teardownThread(void *Ptr) { uptr Iteration = reinterpret_cast(Ptr); @@ -59,4 +61,4 @@ } // namespace __scudo -#endif // SANITIZER_LINUX +#endif // SANITIZER_LINUX && !SANITIZER_ANDROID