Index: lib/scudo/scudo_allocator.cpp =================================================================== --- lib/scudo/scudo_allocator.cpp +++ lib/scudo/scudo_allocator.cpp @@ -388,9 +388,11 @@ if (PrimaryAllocator::CanAllocate(AlignedSize, MinAlignment)) { BackendSize = AlignedSize; ClassId = SizeClassMap::ClassID(BackendSize); - ScudoTSD *TSD = getTSDAndLock(); + bool UnlockRequired; + ScudoTSD *TSD = getTSDAndLock(&UnlockRequired); BackendPtr = BackendAllocator.allocatePrimary(&TSD->Cache, ClassId); - TSD->unlock(); + if (UnlockRequired) + TSD->unlock(); } else { BackendSize = NeededSize; ClassId = 0; @@ -447,10 +449,12 @@ Chunk::eraseHeader(Ptr); void *BackendPtr = Chunk::getBackendPtr(Ptr, Header); if (Header->ClassId) { - ScudoTSD *TSD = getTSDAndLock(); + bool UnlockRequired; + ScudoTSD *TSD = getTSDAndLock(&UnlockRequired); getBackendAllocator().deallocatePrimary(&TSD->Cache, BackendPtr, Header->ClassId); - TSD->unlock(); + if (UnlockRequired) + TSD->unlock(); } else { getBackendAllocator().deallocateSecondary(BackendPtr); } @@ -464,11 +468,13 @@ UnpackedHeader NewHeader = *Header; NewHeader.State = ChunkQuarantine; Chunk::compareExchangeHeader(Ptr, &NewHeader, Header); - ScudoTSD *TSD = getTSDAndLock(); + bool UnlockRequired; + ScudoTSD *TSD = getTSDAndLock(&UnlockRequired); AllocatorQuarantine.Put(getQuarantineCache(TSD), QuarantineCallback(&TSD->Cache), Ptr, EstimatedSize); - TSD->unlock(); + if (UnlockRequired) + TSD->unlock(); } } @@ -612,8 +618,7 @@ Instance.init(); } -void ScudoTSD::init(bool Shared) { - UnlockRequired = Shared; +void ScudoTSD::init() { getBackendAllocator().initCache(&Cache); memset(QuarantineCachePlaceHolder, 0, sizeof(QuarantineCachePlaceHolder)); } Index: lib/scudo/scudo_tsd.h =================================================================== --- lib/scudo/scudo_tsd.h +++ lib/scudo/scudo_tsd.h @@ -23,11 +23,11 @@ namespace __scudo { -struct ALIGNED(64) ScudoTSD { +struct ALIGNED(SANITIZER_CACHE_LINE_SIZE) ScudoTSD { AllocatorCache Cache; uptr QuarantineCachePlaceHolder[4]; - void init(bool Shared); + void init(); void commitBack(); INLINE bool tryLock() { @@ -36,29 +36,23 @@ return true; } if (atomic_load_relaxed(&Precedence) == 0) - atomic_store_relaxed(&Precedence, MonotonicNanoTime()); + atomic_store_relaxed(&Precedence, static_cast( + MonotonicNanoTime() >> FIRST_32_SECOND_64(16, 0))); return false; } INLINE void lock() { - Mutex.Lock(); atomic_store_relaxed(&Precedence, 0); + Mutex.Lock(); } - INLINE void unlock() { - if (!UnlockRequired) - return; - Mutex.Unlock(); - } + INLINE void unlock() { Mutex.Unlock(); } - INLINE u64 getPrecedence() { - return atomic_load_relaxed(&Precedence); - } + INLINE uptr getPrecedence() { return atomic_load_relaxed(&Precedence); } private: - bool UnlockRequired; StaticSpinMutex Mutex; - atomic_uint64_t Precedence; + atomic_uintptr_t Precedence; }; void initThread(bool MinimalInit); Index: lib/scudo/scudo_tsd_exclusive.cpp =================================================================== --- lib/scudo/scudo_tsd_exclusive.cpp +++ lib/scudo/scudo_tsd_exclusive.cpp @@ -50,7 +50,7 @@ static void initOnce() { CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread), 0); initScudo(); - FallbackTSD.init(/*Shared=*/true); + FallbackTSD.init(); } void initThread(bool MinimalInit) { @@ -59,7 +59,7 @@ return; CHECK_EQ(pthread_setspecific(PThreadKey, reinterpret_cast( GetPthreadDestructorIterations())), 0); - TSD.init(/*Shared=*/false); + TSD.init(); ScudoThreadState = ThreadInitialized; } Index: lib/scudo/scudo_tsd_exclusive.inc =================================================================== --- lib/scudo/scudo_tsd_exclusive.inc +++ lib/scudo/scudo_tsd_exclusive.inc @@ -35,11 +35,13 @@ initThread(MinimalInit); } -ALWAYS_INLINE ScudoTSD *getTSDAndLock() { +ALWAYS_INLINE ScudoTSD *getTSDAndLock(bool *UnlockRequired) { if (UNLIKELY(ScudoThreadState != ThreadInitialized)) { FallbackTSD.lock(); + *UnlockRequired = true; return &FallbackTSD; } + *UnlockRequired = false; return &TSD; } Index: lib/scudo/scudo_tsd_shared.cpp =================================================================== --- lib/scudo/scudo_tsd_shared.cpp +++ lib/scudo/scudo_tsd_shared.cpp @@ -23,6 +23,13 @@ static atomic_uint32_t CurrentIndex; static ScudoTSD *TSDs; static u32 NumberOfTSDs; +static u32 CoPrimes[SCUDO_SHARED_TSD_POOL_SIZE]; +static u32 NumberOfCoPrimes = 0; + +#if SANITIZER_LINUX && !SANITIZER_ANDROID +__attribute__((tls_model("initial-exec"))) +THREADLOCAL ScudoTSD *CurrentTSD; +#endif static void initOnce() { CHECK_EQ(pthread_key_create(&PThreadKey, NULL), 0); @@ -31,13 +38,21 @@ static_cast(SCUDO_SHARED_TSD_POOL_SIZE)); TSDs = reinterpret_cast( MmapOrDie(sizeof(ScudoTSD) * NumberOfTSDs, "ScudoTSDs")); - for (u32 i = 0; i < NumberOfTSDs; i++) - TSDs[i].init(/*Shared=*/true); + for (u32 I = 0; I < NumberOfTSDs; I++) { + TSDs[I].init(); + u32 A = I + 1; + u32 B = NumberOfTSDs; + while (B != 0) { const u32 T = A; A = B; B = T % B; } + if (A == 1) + CoPrimes[NumberOfCoPrimes++] = I + 1; + } } ALWAYS_INLINE void setCurrentTSD(ScudoTSD *TSD) { #if SANITIZER_ANDROID *get_android_tls_ptr() = reinterpret_cast(TSD); +#elif SANITIZER_LINUX + CurrentTSD = TSD; #else CHECK_EQ(pthread_setspecific(PThreadKey, reinterpret_cast(TSD)), 0); #endif // SANITIZER_ANDROID @@ -51,33 +66,41 @@ } ScudoTSD *getTSDAndLockSlow() { - ScudoTSD *TSD; + ScudoTSD *TSD = getCurrentTSD(); if (NumberOfTSDs > 1) { - // Go through all the contexts and find the first unlocked one. - for (u32 i = 0; i < NumberOfTSDs; i++) { - TSD = &TSDs[i]; - if (TSD->tryLock()) { - setCurrentTSD(TSD); - return TSD; + u32 RandState = static_cast(TSD->getPrecedence()); + const u32 R = Rand(&RandState); + const u32 Inc = CoPrimes[R % NumberOfCoPrimes]; + u32 Index = R % NumberOfTSDs; + uptr LowestPrecedence = UINTPTR_MAX; + ScudoTSD *CandidateTSD = nullptr; + // Go randomly through at most 4 contexts and find a candidate. + for (u32 I = 0; I < Min(4U, NumberOfTSDs); I++) { + if (&TSDs[Index] == TSD) + continue; + if (TSDs[Index].tryLock()) { + setCurrentTSD(&TSDs[Index]); + return &TSDs[Index]; } - } - // No luck, find the one with the lowest Precedence, and slow lock it. - u64 LowestPrecedence = UINT64_MAX; - for (u32 i = 0; i < NumberOfTSDs; i++) { - u64 Precedence = TSDs[i].getPrecedence(); - if (Precedence && Precedence < LowestPrecedence) { - TSD = &TSDs[i]; + const uptr Precedence = TSDs[Index].getPrecedence(); + // A 0 precedence here means another thread just locked this TSD. + if (UNLIKELY(Precedence == 0)) + continue; + if (Precedence < LowestPrecedence) { + CandidateTSD = &TSDs[Index]; LowestPrecedence = Precedence; } + Index += Inc; + if (Index >= NumberOfTSDs) + Index -= NumberOfTSDs; } - if (LIKELY(LowestPrecedence != UINT64_MAX)) { - TSD->lock(); - setCurrentTSD(TSD); - return TSD; + if (CandidateTSD) { + CandidateTSD->lock(); + setCurrentTSD(CandidateTSD); + return CandidateTSD; } } // Last resort, stick with the current one. - TSD = getCurrentTSD(); TSD->lock(); return TSD; } Index: lib/scudo/scudo_tsd_shared.inc =================================================================== --- lib/scudo/scudo_tsd_shared.inc +++ lib/scudo/scudo_tsd_shared.inc @@ -19,9 +19,16 @@ extern pthread_key_t PThreadKey; +#if SANITIZER_LINUX && !SANITIZER_ANDROID +__attribute__((tls_model("initial-exec"))) +extern THREADLOCAL ScudoTSD *CurrentTSD; +#endif + ALWAYS_INLINE ScudoTSD* getCurrentTSD() { #if SANITIZER_ANDROID return reinterpret_cast(*get_android_tls_ptr()); +#elif SANITIZER_LINUX + return CurrentTSD; #else return reinterpret_cast(pthread_getspecific(PThreadKey)); #endif // SANITIZER_ANDROID @@ -35,9 +42,10 @@ ScudoTSD *getTSDAndLockSlow(); -ALWAYS_INLINE ScudoTSD *getTSDAndLock() { +ALWAYS_INLINE ScudoTSD *getTSDAndLock(bool *UnlockRequired) { ScudoTSD *TSD = getCurrentTSD(); - CHECK(TSD && "No TSD associated with the current thread!"); + DCHECK(TSD && "No TSD associated with the current thread!"); + *UnlockRequired = true; // Try to lock the currently associated context. if (TSD->tryLock()) return TSD;