Index: lib/asan/asan_allocator.h =================================================================== --- lib/asan/asan_allocator.h +++ lib/asan/asan_allocator.h @@ -90,15 +90,20 @@ uptr size_; }; -struct AsanThreadLocalMallocStorage { - uptr quarantine_cache[16]; +struct AsanThreadLocalMallocStorageUnaligned { + // Allocator cache must be 8-byte aligned for 64-bit atomic operations, + // so it comes first. uptr allocator2_cache[96 * (512 * 8 + 16)]; // Opaque. + uptr quarantine_cache[16]; void CommitBack(); private: // These objects are allocated via mmap() and are zero-initialized. AsanThreadLocalMallocStorage() {} }; +typedef ALIGNED(8) AsanThreadLocalMallocStorageUnaligned + AsanThreadLocalMallocStorage; + void *asan_memalign(uptr alignment, uptr size, StackTrace *stack, AllocType alloc_type); void asan_free(void *ptr, StackTrace *stack, AllocType alloc_type); Index: lib/sanitizer_common/sanitizer_atomic.h =================================================================== --- lib/sanitizer_common/sanitizer_atomic.h +++ lib/sanitizer_common/sanitizer_atomic.h @@ -42,7 +42,7 @@ volatile Type val_dont_use; }; -struct atomic_uint64_t { +struct atomic_uint64_s { typedef u64 Type; volatile Type val_dont_use; }; @@ -52,6 +52,9 @@ volatile Type val_dont_use; }; +// On 32-bit platforms u64 is not necessary aligned on 8 bytes. +typedef ALIGNED(8) atomic_uint64_s atomic_uint64_t; + } // namespace __sanitizer #if defined(__GNUC__) Index: lib/sanitizer_common/sanitizer_atomic_clang.h =================================================================== --- lib/sanitizer_common/sanitizer_atomic_clang.h +++ lib/sanitizer_common/sanitizer_atomic_clang.h @@ -15,8 +15,26 @@ #ifndef SANITIZER_ATOMIC_CLANG_H #define SANITIZER_ATOMIC_CLANG_H +#if defined(__i386__) || defined(__x86_64__) +# include "sanitizer_atomic_clang_x86.h" +#else +# include "sanitizer_atomic_clang_other.h" +#endif + namespace __sanitizer { +// We would like to just use compiler builtin atomic operations +// for loads and stores, but they are mostly broken in clang: +// - they lead to vastly inefficient code generation +// (http://llvm.org/bugs/show_bug.cgi?id=17281) +// - 64-bit atomic operations are not implemented on x86_32 +// (http://llvm.org/bugs/show_bug.cgi?id=15034) +// - they are not implemented on ARM +// error: undefined reference to '__atomic_load_4' + +// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html +// for mappings of the memory model to different processors. + INLINE void atomic_signal_fence(memory_order) { __asm__ __volatile__("" ::: "memory"); } @@ -25,59 +43,6 @@ __sync_synchronize(); } -INLINE void proc_yield(int cnt) { - __asm__ __volatile__("" ::: "memory"); -#if defined(__i386__) || defined(__x86_64__) - for (int i = 0; i < cnt; i++) - __asm__ __volatile__("pause"); -#endif - __asm__ __volatile__("" ::: "memory"); -} - -template -INLINE typename T::Type atomic_load( - const volatile T *a, memory_order mo) { - DCHECK(mo & (memory_order_relaxed | memory_order_consume - | memory_order_acquire | memory_order_seq_cst)); - DCHECK(!((uptr)a % sizeof(*a))); - typename T::Type v; - // FIXME: - // 64-bit atomic operations are not atomic on 32-bit platforms. - // The implementation lacks necessary memory fences on ARM/PPC. - // We would like to use compiler builtin atomic operations, - // but they are mostly broken: - // - they lead to vastly inefficient code generation - // (http://llvm.org/bugs/show_bug.cgi?id=17281) - // - 64-bit atomic operations are not implemented on x86_32 - // (http://llvm.org/bugs/show_bug.cgi?id=15034) - // - they are not implemented on ARM - // error: undefined reference to '__atomic_load_4' - if (mo == memory_order_relaxed) { - v = a->val_dont_use; - } else { - atomic_signal_fence(memory_order_seq_cst); - v = a->val_dont_use; - atomic_signal_fence(memory_order_seq_cst); - } - return v; -} - -template -INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) { - DCHECK(mo & (memory_order_relaxed | memory_order_release - | memory_order_seq_cst)); - DCHECK(!((uptr)a % sizeof(*a))); - if (mo == memory_order_relaxed) { - a->val_dont_use = v; - } else { - atomic_signal_fence(memory_order_seq_cst); - a->val_dont_use = v; - atomic_signal_fence(memory_order_seq_cst); - } - if (mo == memory_order_seq_cst) - atomic_thread_fence(memory_order_seq_cst); -} - template INLINE typename T::Type atomic_fetch_add(volatile T *a, typename T::Type v, memory_order mo) { Index: lib/sanitizer_common/sanitizer_atomic_clang_other.h =================================================================== --- lib/sanitizer_common/sanitizer_atomic_clang_other.h +++ lib/sanitizer_common/sanitizer_atomic_clang_other.h @@ -0,0 +1,96 @@ +//===-- sanitizer_atomic_clang_other.h --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of ThreadSanitizer/AddressSanitizer runtime. +// Not intended for direct inclusion. Include sanitizer_atomic.h. +// +//===----------------------------------------------------------------------===// + +#ifndef SANITIZER_ATOMIC_CLANG_OTHER_H +#define SANITIZER_ATOMIC_CLANG_OTHER_H + +namespace __sanitizer { + +INLINE void proc_yield(int cnt) { + __asm__ __volatile__("" ::: "memory"); +} + +template +INLINE typename T::Type atomic_load( + const volatile T *a, memory_order mo) { + DCHECK(mo & (memory_order_relaxed | memory_order_consume + | memory_order_acquire | memory_order_seq_cst)); + DCHECK(!((uptr)a % sizeof(*a))); + typename T::Type v; + + if (sizeof(*a) < 8 || sizeof(void*) == 8) { + // Assume that aligned loads are atomic. + if (mo == memory_order_relaxed) { + v = a->val_dont_use; + } else if (mo == memory_order_consume) { + // Assume that processor respects data dependencies + // (and that compiler won't break them). + __asm__ __volatile__("" ::: "memory"); + v = a->val_dont_use; + __asm__ __volatile__("" ::: "memory"); + } else if (mo == memory_order_acquire) { + __asm__ __volatile__("" ::: "memory"); + v = a->val_dont_use; + __sync_synchronize(); + } else { // seq_cst + // E.g. on POWER we need a hw fence even before the store. + __sync_synchronize(); + v = a->val_dont_use; + __sync_synchronize(); + } + } else { + // 64-bit load on 32-bit platform. + // Gross, but simple and reliable. + // Assume that it is not in read-only memory. + v = __sync_fetch_and_add((typename T::Type volatile*)&a->val_dont_use, 0); + } + return v; +} + +template +INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) { + DCHECK(mo & (memory_order_relaxed | memory_order_release + | memory_order_seq_cst)); + DCHECK(!((uptr)a % sizeof(*a))); + + if (sizeof(*a) < 8 || sizeof(void*) == 8) { + // Assume that aligned loads are atomic. + if (mo == memory_order_relaxed) { + a->val_dont_use = v; + } else if (mo == memory_order_release) { + __sync_synchronize(); + a->val_dont_use = v; + __asm__ __volatile__("" ::: "memory"); + } else { // seq_cst + __sync_synchronize(); + a->val_dont_use = v; + __sync_synchronize(); + } + } else { + // 64-bit store on 32-bit platform. + // Gross, but simple and reliable. + typename T::Type cmp = a->val_dont_use; + typename T::Type cur; + for (;;) { + cur = __sync_val_compare_and_swap(&a->val_dont_use, cmp, v); + if (cmp == v) + break; + cmp = cur; + } + } +} + +} // namespace __sanitizer + +#endif // #ifndef SANITIZER_ATOMIC_CLANG_OTHER_H Index: lib/sanitizer_common/sanitizer_atomic_clang_x86.h =================================================================== --- lib/sanitizer_common/sanitizer_atomic_clang_x86.h +++ lib/sanitizer_common/sanitizer_atomic_clang_x86.h @@ -0,0 +1,116 @@ +//===-- sanitizer_atomic_clang_x86.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of ThreadSanitizer/AddressSanitizer runtime. +// Not intended for direct inclusion. Include sanitizer_atomic.h. +// +//===----------------------------------------------------------------------===// + +#ifndef SANITIZER_ATOMIC_CLANG_X86_H +#define SANITIZER_ATOMIC_CLANG_X86_H + +namespace __sanitizer { + +INLINE void proc_yield(int cnt) { + __asm__ __volatile__("" ::: "memory"); + for (int i = 0; i < cnt; i++) + __asm__ __volatile__("pause"); + __asm__ __volatile__("" ::: "memory"); +} + +template +INLINE typename T::Type atomic_load( + const volatile T *a, memory_order mo) { + DCHECK(mo & (memory_order_relaxed | memory_order_consume + | memory_order_acquire | memory_order_seq_cst)); + DCHECK(!((uptr)a % sizeof(*a))); + typename T::Type v; + + if (sizeof(*a) < 8 || sizeof(void*) == 8) { + // Assume that aligned loads are atomic. + if (mo == memory_order_relaxed) { + v = a->val_dont_use; + } else if (mo == memory_order_consume) { + // Assume that processor respects data dependencies + // (and that compiler won't break them). + __asm__ __volatile__("" ::: "memory"); + v = a->val_dont_use; + __asm__ __volatile__("" ::: "memory"); + } else if (mo == memory_order_acquire) { + __asm__ __volatile__("" ::: "memory"); + v = a->val_dont_use; + // On x86 loads are implicitly acquire. + __asm__ __volatile__("" ::: "memory"); + } else { // seq_cst + // On x86 plain MOV is enough for seq_cst store. + __asm__ __volatile__("" ::: "memory"); + v = a->val_dont_use; + __asm__ __volatile__("" ::: "memory"); + } + } else { + // 64-bit load on 32-bit platform. + __asm__ __volatile__( + "movq %1, %%mm0;" // Use mmx reg for 64-bit atomic moves + "movq %%mm0, %0;" // (ptr could be read-only) + "emms;" // Empty mmx state/Reset FP regs + : "=m" (v) + : "m" (a->val_dont_use) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", +#ifdef __MMX__ + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", +#endif // #ifdef __MMX__ + "memory"); + } + return v; +} + +template +INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) { + DCHECK(mo & (memory_order_relaxed | memory_order_release + | memory_order_seq_cst)); + DCHECK(!((uptr)a % sizeof(*a))); + + if (sizeof(*a) < 8 || sizeof(void*) == 8) { + // Assume that aligned loads are atomic. + if (mo == memory_order_relaxed) { + a->val_dont_use = v; + } else if (mo == memory_order_release) { + // On x86 stores are implicitly release. + __asm__ __volatile__("" ::: "memory"); + a->val_dont_use = v; + __asm__ __volatile__("" ::: "memory"); + } else { // seq_cst + // On x86 stores are implicitly release. + __asm__ __volatile__("" ::: "memory"); + a->val_dont_use = v; + __sync_synchronize(); + } + } else { + // 64-bit store on 32-bit platform. + __asm__ __volatile__( + "movq %1, %%mm0;" // Use mmx reg for 64-bit atomic moves + "movq %%mm0, %0;" + "emms;" // Empty mmx state/Reset FP regs + : "=m" (a->val_dont_use) + : "m" (v) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", +#ifdef __MMX__ + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", +#endif // #ifdef __MMX__ + "memory"); + if (mo == memory_order_seq_cst) + __sync_synchronize(); + } +} + +} // namespace __sanitizer + +#endif // #ifndef SANITIZER_ATOMIC_CLANG_X86_H Index: lib/sanitizer_common/tests/sanitizer_atomic_test.cc =================================================================== --- lib/sanitizer_common/tests/sanitizer_atomic_test.cc +++ lib/sanitizer_common/tests/sanitizer_atomic_test.cc @@ -15,6 +15,79 @@ namespace __sanitizer { +template +struct ValAndMagic { + typename T::Type magic0; + T a; + typename T::Type magic1; + + static ValAndMagic *sink; +}; + +template +ValAndMagic *ValAndMagic::sink; + +template +void CheckStoreLoad() { + typedef typename T::Type Type; + ValAndMagic val; + // Prevent the compiler from scalarizing the struct. + ValAndMagic::sink = &val; + // Ensure that surrounding memory is not overwritten. + val.magic0 = val.magic1 = (Type)-3; + for (u64 i = 0; i < 100; i++) { + // Generate a value that occupies all bytes of the variable. + u64 v = i; + v |= v << 8; + v |= v << 16; + v |= v << 32; + val.a.val_dont_use = (Type)v; + EXPECT_EQ(atomic_load(&val.a, load_mo), (Type)v); + val.a.val_dont_use = (Type)-1; + atomic_store(&val.a, (Type)v, store_mo); + EXPECT_EQ(val.a.val_dont_use, (Type)v); + } + EXPECT_EQ(val.magic0, (Type)-3); + EXPECT_EQ(val.magic1, (Type)-3); +} + +TEST(SanitizerCommon, AtomicStoreLoad) { + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + CheckStoreLoad(); + + CheckStoreLoad + (); + CheckStoreLoad + (); + CheckStoreLoad + (); + CheckStoreLoad + (); + CheckStoreLoad + (); +} + // Clang crashes while compiling this test for Android: // http://llvm.org/bugs/show_bug.cgi?id=15587 #if !SANITIZER_ANDROID