diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -112,21 +112,6 @@ *RandState = State; } -// Hardware specific inlinable functions. - -inline void yieldProcessor(UNUSED u8 Count) { -#if defined(__i386__) || defined(__x86_64__) - __asm__ __volatile__("" ::: "memory"); - for (u8 I = 0; I < Count; I++) - __asm__ __volatile__("pause"); -#elif defined(__aarch64__) || defined(__arm__) - __asm__ __volatile__("" ::: "memory"); - for (u8 I = 0; I < Count; I++) - __asm__ __volatile__("yield"); -#endif - __asm__ __volatile__("" ::: "memory"); -} - // Platform specific functions. extern uptr PageSizeCached; diff --git a/compiler-rt/lib/scudo/standalone/mutex.h b/compiler-rt/lib/scudo/standalone/mutex.h --- a/compiler-rt/lib/scudo/standalone/mutex.h +++ b/compiler-rt/lib/scudo/standalone/mutex.h @@ -35,7 +35,7 @@ #pragma nounroll #endif for (u8 I = 0U; I < NumberOfTries; I++) { - yieldProcessor(NumberOfYields); + delayLoop(); if (tryLock()) return; } @@ -53,10 +53,21 @@ } private: + void delayLoop() { + // The value comes from the average time spent in accessing caches (which + // are the fastest operations) so that we are unlikely to wait too long for + // fast operations. + constexpr u32 SpinTimes = 16; + volatile u32 V = 0; + for (u32 I = 0; I < SpinTimes; ++I) + ++V; + } + void assertHeldImpl(); - static constexpr u8 NumberOfTries = 8U; - static constexpr u8 NumberOfYields = 8U; + // TODO(chiahungduan): Adapt this value based on scenarios. E.g., primary and + // secondary allocator have different allocation times. + static constexpr u8 NumberOfTries = 32U; #if SCUDO_LINUX atomic_u32 M = {};