Index: compiler-rt/lib/builtins/atomic.c =================================================================== --- compiler-rt/lib/builtins/atomic.c +++ compiler-rt/lib/builtins/atomic.c @@ -122,36 +122,60 @@ return locks + (hash & SPINLOCK_MASK); } -/// Macros for determining whether a size is lock free. Clang can not yet -/// codegen __atomic_is_lock_free(16), so for now we assume 16-byte values are -/// not lock free. -#define IS_LOCK_FREE_1 __c11_atomic_is_lock_free(1) -#define IS_LOCK_FREE_2 __c11_atomic_is_lock_free(2) -#define IS_LOCK_FREE_4 __c11_atomic_is_lock_free(4) -#define IS_LOCK_FREE_8 __c11_atomic_is_lock_free(8) -#define IS_LOCK_FREE_16 0 +// x86 guarantees all operations except load and store will be atomic on +// misaligned pointers too (with possible performance penalties), and that's +// baked into the ABI. +#if defined(__x86_64__) || defined(__i386__) +#define LOCK_FREE_MISALIGNED_ATOMICS 1 +#else +#define LOCK_FREE_MISALIGNED_ATOMICS 0 +#endif + +/// Macros for determining whether a size is lock free. +/// __c11_atomic_lock_free(16) is sometimes a dynamic property (e.g. not all +/// x86_64 CPUs have cmpxchg16) but compiler-rt does not implement the required +/// libcall yet so we use an approximation valid on all CPUs where it is a +/// static property. +#define IS_LOCK_FREE_1(ptr) __c11_atomic_is_lock_free(1) +#define IS_LOCK_FREE_2(ptr) (__c11_atomic_is_lock_free(2) && (LOCK_FREE_MISALIGNED_ATOMICS || (uintptr_t)ptr % 2 == 0)) +#define IS_LOCK_FREE_4(ptr) (__c11_atomic_is_lock_free(4) && (LOCK_FREE_MISALIGNED_ATOMICS || (uintptr_t)ptr % 4 == 0)) +#define IS_LOCK_FREE_8(ptr) (__c11_atomic_is_lock_free(8) && (LOCK_FREE_MISALIGNED_ATOMICS || (uintptr_t)ptr % 8 == 0)) +#define IS_LOCK_FREE_16(ptr) (__atomic_always_lock_free(16, 0) && (LOCK_FREE_MISALIGNED_ATOMICS || (uintptr_t)ptr % 16 == 0)) + +// Most 32-bit platforms do not have __int128_t, but fortunately they also don't +// support 16-byte atomics so in that case the horribly wrong use of uint64_t +// will never actually happen. +#if defined(__SIZEOF_INT128__) +#define LOCK_FREE_16_TYPE __uint128_t +#else +_Static_assert(!IS_LOCK_FREE_16(0), "lock free 16-byte atomic attempted but nothing to implement it with"); +#define LOCK_FREE_16_TYPE uint64_t +#endif /// Macro that calls the compiler-generated lock-free versions of functions /// when they exist. -#define LOCK_FREE_CASES() \ +#define LOCK_FREE_CASES(ptr) \ do {\ switch (size) {\ + case 1:\ + if (IS_LOCK_FREE_1(ptr)) {\ + LOCK_FREE_ACTION(uint8_t);\ + }\ case 2:\ - if (IS_LOCK_FREE_2) {\ + if (IS_LOCK_FREE_2(ptr)) {\ LOCK_FREE_ACTION(uint16_t);\ }\ case 4:\ - if (IS_LOCK_FREE_4) {\ + if (IS_LOCK_FREE_4(ptr)) {\ LOCK_FREE_ACTION(uint32_t);\ }\ case 8:\ - if (IS_LOCK_FREE_8) {\ + if (IS_LOCK_FREE_8(ptr)) {\ LOCK_FREE_ACTION(uint64_t);\ }\ case 16:\ - if (IS_LOCK_FREE_16) {\ - /* FIXME: __uint128_t isn't available on 32 bit platforms. - LOCK_FREE_ACTION(__uint128_t);*/\ + if (IS_LOCK_FREE_16(ptr)) {\ + LOCK_FREE_ACTION(LOCK_FREE_16_TYPE);\ }\ }\ } while (0) @@ -160,10 +184,22 @@ /// An atomic load operation. This is atomic with respect to the source /// pointer only. void __atomic_load_c(int size, void *src, void *dest, int model) { +#if LOCK_FREE_MISALIGNED_ATOMICS +#define LOCK_FREE_ACTION(type) \ + if ((uintptr_t)src % size == 0)\ + *((type*)dest) = __c11_atomic_load((_Atomic(type)*)src, model);\ + else {\ + type tmp = 0;\ + __c11_atomic_compare_exchange_weak((_Atomic(type)*)src, &tmp, 0, model, model);\ + *((type*)dest) = tmp;\ + }\ + return; +#else #define LOCK_FREE_ACTION(type) \ *((type*)dest) = __c11_atomic_load((_Atomic(type)*)src, model);\ return; - LOCK_FREE_CASES(); +#endif + LOCK_FREE_CASES(src); #undef LOCK_FREE_ACTION Lock *l = lock_for_pointer(src); lock(l); @@ -174,10 +210,19 @@ /// An atomic store operation. This is atomic with respect to the destination /// pointer only. void __atomic_store_c(int size, void *dest, void *src, int model) { -#define LOCK_FREE_ACTION(type) \ +#if LOCK_FREE_MISALIGNED_ATOMICS +#define LOCK_FREE_ACTION(type)\ + if ((uintptr_t)dest % size == 0)\ + __c11_atomic_store((_Atomic(type)*)dest, *(type*)dest, model);\ + else\ + __c11_atomic_exchange((_Atomic(type)*)dest, *(type *)dest, model);\ + return; +#else +#define LOCK_FREE_ACTION(type) \ __c11_atomic_store((_Atomic(type)*)dest, *(type*)dest, model);\ return; - LOCK_FREE_CASES(); +#endif + LOCK_FREE_CASES(dest); #undef LOCK_FREE_ACTION Lock *l = lock_for_pointer(dest); lock(l); @@ -195,7 +240,7 @@ #define LOCK_FREE_ACTION(type) \ return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, (type*)expected,\ *(type*)desired, success, failure) - LOCK_FREE_CASES(); + LOCK_FREE_CASES(ptr); #undef LOCK_FREE_ACTION Lock *l = lock_for_pointer(ptr); lock(l); @@ -216,7 +261,7 @@ *(type*)old = __c11_atomic_exchange((_Atomic(type)*)ptr, *(type*)val,\ model);\ return; - LOCK_FREE_CASES(); + LOCK_FREE_CASES(ptr); #undef LOCK_FREE_ACTION Lock *l = lock_for_pointer(ptr); lock(l); @@ -246,7 +291,7 @@ #define OPTIMISED_CASE(n, lockfree, type)\ type __atomic_load_##n(type *src, int model) {\ - if (lockfree)\ + if (lockfree(src))\ return __c11_atomic_load((_Atomic(type)*)src, model);\ Lock *l = lock_for_pointer(src);\ lock(l);\ @@ -259,7 +304,7 @@ #define OPTIMISED_CASE(n, lockfree, type)\ void __atomic_store_##n(type *dest, type val, int model) {\ - if (lockfree) {\ + if (lockfree(dest)) {\ __c11_atomic_store((_Atomic(type)*)dest, val, model);\ return;\ }\ @@ -274,7 +319,7 @@ #define OPTIMISED_CASE(n, lockfree, type)\ type __atomic_exchange_##n(type *dest, type val, int model) {\ - if (lockfree)\ + if (lockfree(dest))\ return __c11_atomic_exchange((_Atomic(type)*)dest, val, model);\ Lock *l = lock_for_pointer(dest);\ lock(l);\ @@ -289,7 +334,7 @@ #define OPTIMISED_CASE(n, lockfree, type)\ int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired,\ int success, int failure) {\ - if (lockfree)\ + if (lockfree(ptr))\ return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, expected, desired,\ success, failure);\ Lock *l = lock_for_pointer(ptr);\ @@ -311,7 +356,7 @@ //////////////////////////////////////////////////////////////////////////////// #define ATOMIC_RMW(n, lockfree, type, opname, op) \ type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) {\ - if (lockfree) \ + if (lockfree(ptr))\ return __c11_atomic_fetch_##opname((_Atomic(type)*)ptr, val, model);\ Lock *l = lock_for_pointer(ptr);\ lock(l);\