diff --git a/compiler-rt/lib/builtins/atomic.c b/compiler-rt/lib/builtins/atomic.c --- a/compiler-rt/lib/builtins/atomic.c +++ b/compiler-rt/lib/builtins/atomic.c @@ -27,6 +27,15 @@ #include #include +#if defined(__x86_64__) || defined(__i386__) +#include +#endif + +#if defined(__aarch64__) +#include +#include +#endif + #include "assembly.h" // Clang objects if you redefine a builtin. This little hack allows us to @@ -120,14 +129,145 @@ return locks + (hash & SPINLOCK_MASK); } -/// Macros for determining whether a size is lock free. Clang can not yet -/// codegen __atomic_is_lock_free(16), so for now we assume 16-byte values are -/// not lock free. +#if defined(__x86_64__) || defined(__i386__) + +#ifdef __x86_64__ +#define FEAT_REG ecx +#define MASK bit_CMPXCHG16B +#else +#define FEAT_REG edx +#define MASK bit_CMPXCHG8B +#endif + +static inline bool check_x86_atomic_cas(void) { + unsigned int eax, ebx, ecx = 0, edx = 0; + __get_cpuid(1, &eax, &ebx, &ecx, &edx); + return (FEAT_REG & MASK) != 0; +} + +static inline bool have_cas(int N) { + static int __have_atomic_cas = -1; + if (__have_atomic_cas == -1) { + __have_atomic_cas = check_x86_atomic_cas() != 0 ? 1 : 0; + } + switch (N) { + case 1: + case 2: + case 4: + return true; + case 8: +#ifdef __x86_64__ + case 16: +#endif + return __have_atomic_cas; + } + return false; +} +#elif defined(__aarch64__) +static inline bool have_cas(int N) { + static int __has_atomic_cap = -1; + if (__have_atomic_cap == -1) { + __have_atomic_cap = (getauxval(AT_HWCAP) & HWCAP_ATOMICS) != 0 ? 1 : 0; + } + switch (N) { + case 1: + case 2: + case 4: + case 8: + return __have_atomic_cap; + } + return false; +} +#elif defined(__arm__) +static inline bool have_cas(int N) { + switch (N) { + case 1: + case 2: + case 4: + case 8: + return false; // FIXME: not sure the check similar to aarch64 works + } + return false; +} +#else +static inline bool have_cas(int) { return false; } +#endif + +// The following CHECK_LOCK_FREE* macros are for checking if the given +// size and aligment could be lock free for non power-of-2 and power-of-2 +// sizes, respectively. +// +// Return true if it could positively be determined to be lock free. +// Otherwise, fall through to the next bucket (next power-of-2). +#define CHECK_LOCK_FREE(N) \ + do { \ + if (!have_cas(N)) \ + break; \ + uintptr_t r = (uintptr_t)ptr & (N - 1); \ + if (size <= N - r) \ + return true; \ + } while (0) + +#define CHECK_LOCK_FREE_POW2(N) \ + do { \ + uintptr_t r = (uintptr_t)ptr & (N - 1); \ + if (r != 0) \ + break; \ + if (__atomic_always_lock_free(N, 0)) \ + return true; \ + if (have_cas(N)) \ + return true; \ + } while (0) + +bool __atomic_is_lock_free(unsigned long size, const volatile void *ptr) { + switch (size) { + case 0: + return true; + case 1: + CHECK_LOCK_FREE_POW2(1); + goto L3; + case 2: + CHECK_LOCK_FREE_POW2(2); // fall through + case 3: + L3: + CHECK_LOCK_FREE(4); + goto L5_7; + + case 4: + CHECK_LOCK_FREE_POW2(4); // fall through + case 5: + case 6: + case 7: + L5_7: + CHECK_LOCK_FREE(8); + goto L9_15; + + case 8: + CHECK_LOCK_FREE_POW2(8); // fall through + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + L9_15: + CHECK_LOCK_FREE(16); + break; + + case 16: + CHECK_LOCK_FREE_POW2(16); + break; + } + return false; +} + +/// Macros for determining whether a size is lock free. #define IS_LOCK_FREE_1 __c11_atomic_is_lock_free(1) #define IS_LOCK_FREE_2 __c11_atomic_is_lock_free(2) #define IS_LOCK_FREE_4 __c11_atomic_is_lock_free(4) #define IS_LOCK_FREE_8 __c11_atomic_is_lock_free(8) -#define IS_LOCK_FREE_16 0 +#define IS_LOCK_FREE_16 __c11_atomic_is_lock_free(16) /// Macro that calls the compiler-generated lock-free versions of functions /// when they exist. @@ -156,8 +296,7 @@ break; \ case 16: \ if (IS_LOCK_FREE_16) { \ - /* FIXME: __uint128_t isn't available on 32 bit platforms. \ - LOCK_FREE_ACTION(__uint128_t);*/ \ + LOCK_FREE_ACTION(__uint128_t); \ } \ break; \ } \