Index: lib/interception/interception.h =================================================================== --- lib/interception/interception.h +++ lib/interception/interception.h @@ -74,6 +74,15 @@ // we intercept. To resolve this we declare our interceptors with __interceptor_ // prefix, and then make actual interceptors weak aliases to __interceptor_ // functions. +// Another complication is that we may have been already included declaration +// for an intercepted function from standard headers, and then get declaration +// mismatch between standard and our signatures (e.g. standard declarations can +// declare some arguments as __restrict). To circumvent this we declare actual +// interceptors with __interceptor_fake_ prefix and them strip the prefix with +// asm directive. +// Another complication is that an intercepted function can also be a compiler +// builtin (e.g. __atomic_load). Fortunately, __interceptor_fake_ hack also +// resolves this problem. // // This is not so on Mac OS, where the two-level namespace makes // our replacement functions invisible to other libraries. This may be overcomed @@ -143,8 +152,14 @@ # define WRAP(x) __interceptor_ ## x # define WRAPPER_NAME(x) "__interceptor_" #x # define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default"))) +# if defined(__APPLE__) +# define INTERCEPTOR_ASM_PREFIX(x) "_" x +# else +# define INTERCEPTOR_ASM_PREFIX(x) x +# endif # define DECLARE_WRAPPER(ret_type, func, ...) \ - extern "C" ret_type func(__VA_ARGS__) \ + extern "C" ret_type __interceptor_fake_ ## func(__VA_ARGS__) \ + __asm(INTERCEPTOR_ASM_PREFIX(#func)) \ __attribute__((weak, alias("__interceptor_" #func), visibility("default"))); #endif Index: lib/interception/interception_linux.h =================================================================== --- lib/interception/interception_linux.h +++ lib/interception/interception_linux.h @@ -31,7 +31,7 @@ #define INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) \ ::__interception::GetRealFunctionAddress( \ #func, (::__interception::uptr *)&__interception::PTR_TO_REAL(func), \ - (::__interception::uptr) & (func), \ + (::__interception::uptr) & (__interceptor_fake_ ## func), \ (::__interception::uptr) & WRAP(func)) #if !defined(__ANDROID__) // android does not have dlvsym Index: lib/msan/msan_interceptors.cc =================================================================== --- lib/msan/msan_interceptors.cc +++ lib/msan/msan_interceptors.cc @@ -488,7 +488,7 @@ ENSURE_MSAN_INITED(); va_list ap; va_start(ap, format); - int res = vswprintf(str, size, format, ap); + int res = WRAP(vswprintf)(str, size, format, ap); va_end(ap); return res; } Index: lib/tsan/rtl/tsan_interceptors.cc =================================================================== --- lib/tsan/rtl/tsan_interceptors.cc +++ lib/tsan/rtl/tsan_interceptors.cc @@ -1339,6 +1339,74 @@ return 0; } +TSAN_INTERCEPTOR(u64, __atomic_load_8, u64 *a, int ord) { + SCOPED_TSAN_INTERCEPTOR(__atomic_load_8, a, ord); + return __tsan_atomic64_load(a, (morder)ord); +} + +TSAN_INTERCEPTOR(void, __atomic_store_8, u64 *a, u64 v, int ord) { + SCOPED_TSAN_INTERCEPTOR(__atomic_store_8, a, v, ord); + __tsan_atomic64_store(a, v, (morder)ord); +} + +TSAN_INTERCEPTOR(u64, __atomic_exchange_8, u64 *a, u64 v, int ord) { + SCOPED_TSAN_INTERCEPTOR(__atomic_exchange_8, a, v, ord); + return __tsan_atomic64_exchange(a, v, (morder)ord); +} + +TSAN_INTERCEPTOR(bool, __atomic_compare_exchange_8, u64 *a, u64 *cmp, + u64 v, int sord, int ford) { + SCOPED_TSAN_INTERCEPTOR(__atomic_compare_exchange_8, a, cmp, v, sord, ford); + return __tsan_atomic64_compare_exchange_strong(a, cmp, v, (morder)sord, + (morder)ford); +} + +#if __TSAN_HAS_INT128 +TSAN_INTERCEPTOR(a128, __atomic_load_16, a128 *a, int ord) { + SCOPED_TSAN_INTERCEPTOR(__atomic_load_16, a, ord); + return __tsan_atomic128_load(a, (morder)ord); +} + +TSAN_INTERCEPTOR(void, __atomic_store_16, a128 *a, a128 v, int ord) { + SCOPED_TSAN_INTERCEPTOR(__atomic_store_16, a, v, ord); + __tsan_atomic128_store(a, v, (morder)ord); +} + +TSAN_INTERCEPTOR(a128, __atomic_exchange_16, a128 *a, a128 v, int ord) { + SCOPED_TSAN_INTERCEPTOR(__atomic_exchange_16, a, v, ord); + return __tsan_atomic128_exchange(a, v, (morder)ord); +} + +TSAN_INTERCEPTOR(bool, __atomic_compare_exchange_16, a128 *a, a128 *cmp, + a128 v, int sord, int ford) { + SCOPED_TSAN_INTERCEPTOR(__atomic_compare_exchange_16, a, cmp, v, sord, ford); + return __tsan_atomic128_compare_exchange_strong(a, cmp, v, (morder)sord, + (morder)ford); +} +#endif + +TSAN_INTERCEPTOR(void, __atomic_load, SIZE_T n, void *a, void *v, int ord) { + SCOPED_TSAN_INTERCEPTOR(__atomic_load, n, a, v, ord); + AtomicLoad(thr, pc, n, a, v, ord); +} + +TSAN_INTERCEPTOR(void, __atomic_store, SIZE_T n, void *a, void *v, int ord) { + SCOPED_TSAN_INTERCEPTOR(__atomic_store, n, a, v, ord); + AtomicStore(thr, pc, n, a, v, ord); +} + +TSAN_INTERCEPTOR(void, __atomic_exchange, SIZE_T n, void *a, void *v, void *ret, + int ord) { + SCOPED_TSAN_INTERCEPTOR(__atomic_exchange, n, a, v, ret, ord); + AtomicExchange(thr, pc, n, a, v, ret, ord); +} + +TSAN_INTERCEPTOR(bool, __atomic_compare_exchange, SIZE_T n, void *a, void *cmp, + void *v, int sord, int ford) { + SCOPED_TSAN_INTERCEPTOR(__atomic_compare_exchange, n, a, cmp, v, sord, ford); + return AtomicCompareExchange(thr, pc, n, a, cmp, v, sord, ford); +} + #if SANITIZER_LINUX && !SANITIZER_ANDROID TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) { SCOPED_TSAN_INTERCEPTOR(__fxstat, version, fd, buf); @@ -1974,7 +2042,7 @@ internal_memset(&act.sa_mask, -1, sizeof(act.sa_mask)); act.sa_flags = 0; sigaction_t old; - int res = sigaction(sig, &act, &old); + int res = WRAP(sigaction)(sig, &act, &old); if (res) return SIG_ERR; return old.sa_handler; @@ -2530,6 +2598,21 @@ TSAN_INTERCEPT(pthread_once); + TSAN_INTERCEPT(__atomic_load_8); + TSAN_INTERCEPT(__atomic_store_8); + TSAN_INTERCEPT(__atomic_exchange_8); + TSAN_INTERCEPT(__atomic_compare_exchange_8); +#if __TSAN_HAS_INT128 + TSAN_INTERCEPT(__atomic_load_16); + TSAN_INTERCEPT(__atomic_store_16); + TSAN_INTERCEPT(__atomic_exchange_16); + TSAN_INTERCEPT(__atomic_compare_exchange_16); +#endif + TSAN_INTERCEPT(__atomic_load); + TSAN_INTERCEPT(__atomic_store); + TSAN_INTERCEPT(__atomic_exchange); + TSAN_INTERCEPT(__atomic_compare_exchange); + TSAN_INTERCEPT(fstat); TSAN_MAYBE_INTERCEPT___FXSTAT; TSAN_MAYBE_INTERCEPT_FSTAT64; Index: lib/tsan/rtl/tsan_interface_atomic.cc =================================================================== --- lib/tsan/rtl/tsan_interface_atomic.cc +++ lib/tsan/rtl/tsan_interface_atomic.cc @@ -445,6 +445,74 @@ } #endif +namespace __tsan { +void AtomicLoad(ThreadState *thr, uptr pc, uptr n, void *a, void *v, int ord) { + const morder mo = static_cast(ord); + CHECK(IsLoadOrder(mo)); + SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, false); + if (IsAcquireOrder(mo)) + AcquireImpl(thr, pc, &s->clock); + internal_memcpy(v, a, n); + s->mtx.ReadUnlock(); + MemoryReadAtomic(thr, pc, (uptr)a, kSizeLog1); +} + +void AtomicStore(ThreadState *thr, uptr pc, uptr n, void *a, void *v, int ord) { + const morder mo = static_cast(ord); + CHECK(IsStoreOrder(mo)); + MemoryWriteAtomic(thr, pc, (uptr)a, kSizeLog1); + SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true); + if (IsReleaseOrder(mo)) { + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + ReleaseImpl(thr, pc, &s->clock); + } + internal_memcpy(v, a, n); + s->mtx.Unlock(); +} + +void AtomicExchange(ThreadState *thr, uptr pc, uptr n, void *a, void *v, + void *ret, int ord) { + const morder mo = static_cast(ord); + MemoryWriteAtomic(thr, pc, (uptr)a, kSizeLog1); + SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true); + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + if (IsAcqRelOrder(mo)) + AcquireReleaseImpl(thr, pc, &s->clock); + else if (IsReleaseOrder(mo)) + ReleaseImpl(thr, pc, &s->clock); + else if (IsAcquireOrder(mo)) + AcquireImpl(thr, pc, &s->clock); + internal_memcpy(ret, a, n); + internal_memcpy(a, v, n); + s->mtx.Unlock(); +} + +bool AtomicCompareExchange(ThreadState *thr, uptr pc, uptr n, void *a, void *c, + void *v, int sord, int ford) { + (void)ford; + const morder mo = static_cast(sord); + MemoryWriteAtomic(thr, pc, (uptr)a, kSizeLog1); + SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true); + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + if (IsAcqRelOrder(mo)) + AcquireReleaseImpl(thr, pc, &s->clock); + else if (IsReleaseOrder(mo)) + ReleaseImpl(thr, pc, &s->clock); + else if (IsAcquireOrder(mo)) + AcquireImpl(thr, pc, &s->clock); + bool res = internal_memcmp(a, c, n) == 0; + internal_memcpy((res ? a : c), (res ? v : a), n); + s->mtx.Unlock(); + return res; +} +} // namespace __tsan + // Interface functions follow. #if !SANITIZER_GO Index: lib/tsan/rtl/tsan_rtl.h =================================================================== --- lib/tsan/rtl/tsan_rtl.h +++ lib/tsan/rtl/tsan_rtl.h @@ -756,6 +756,14 @@ void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c); void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c); +// Variable-size atomic operations for libatomic interceptors. +void AtomicLoad(ThreadState *thr, uptr pc, uptr n, void *a, void *v, int ord); +void AtomicStore(ThreadState *thr, uptr pc, uptr n, void *a, void *v, int ord); +void AtomicExchange(ThreadState *thr, uptr pc, uptr n, void *a, void *v, + void *ret, int ord); +bool AtomicCompareExchange(ThreadState *thr, uptr pc, uptr n, void *a, void *c, + void *v, int sord, int ford); + // The hacky call uses custom calling convention and an assembly thunk. // It is considerably faster that a normal call for the caller // if it is not executed (it is intended for slow paths from hot functions). Index: test/tsan/atomic_test.cc =================================================================== --- test/tsan/atomic_test.cc +++ test/tsan/atomic_test.cc @@ -0,0 +1,245 @@ +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE=char && %deflake %run %t | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE=short && %deflake %run %t | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE=int && %deflake %run %t | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE=long && %deflake %run %t | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE="MyStruct<6>" -latomic && %deflake %run %t | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE="MyStruct<8>" -latomic && %deflake %run %t | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE="MyStruct<12>" -latomic && %deflake %run %t | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE="MyStruct<16>" -latomic && %deflake %run %t | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE="MyStruct<24>" -latomic && %deflake %run %t | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE="MyStruct<32>" -latomic && %deflake %run %t | FileCheck %s +// RUN: %clangxx_tsan -O1 %s -o %t -DTEST_TYPE="MyStruct<128>" -latomic && %deflake %run %t | FileCheck %s +#include "test.h" +#include +#include + +// Test operation and synchronization provided by atomic variables of different +// sizes, including sizes > 16 which are handled by libatomic. +// Note that the only allowed atomic operations for these larger types are: +// load, store, exchange, compare_exchange. + +template +struct MyStruct { + char data[kSize]; + + explicit MyStruct(char v = 0) noexcept { + memset(&data[0], v, sizeof(data)); + } + + bool operator == (const MyStruct &other) const { + return memcmp(&data[0], &other.data[0], sizeof(data)) == 0; + } + + bool operator != (const MyStruct &other) const { + return !(*this == other); + } + + operator int() const { + return data[0]; + } +}; + +#ifdef TEST_TYPE +typedef TEST_TYPE T; +#else +typedef long T; +#endif + +struct Data { + long pad0; + std::atomic a; + long pad1; + long v; +}; + +const int kTestCount = 7; +Data data[2 * kTestCount]; + +void Test(int test, Data *p, bool main_thread) { + if (test == 0) { + // Test that we detect races between atomic and non-atomic accesses. + if (main_thread) + p->a.store(T(1)); + else + memset(&p->a, 0, 1); +// CHECK: Test 0 forward +// CHECK: ThreadSanitizer: data race +// CHECK: Test 0 reverse +// CHECK: ThreadSanitizer: data race + + } else if (test == 1) { + // Normal acquire-release synchronization. + if (p->a.load(std::memory_order_acquire) == T(0)) { + p->v = 42; + p->a.store(T(17), std::memory_order_release); + } else { + if (p->v != 42) { + fprintf(stderr, "%d: bad value %ld\n", __LINE__, p->v); + exit(0); + } + if (p->a.load(std::memory_order_relaxed) != T(17)) { + fprintf(stderr, "%d: bad atomic value %d\n", __LINE__, + (int)p->a.load()); + exit(0); + } + } +// CHECK: Test 1 forward +// CHECK-NOT: ThreadSanitizer: data race +// CHECK: Test 1 reverse +// CHECK-NOT: ThreadSanitizer: data race + + } else if (test == 2) { + // The same as the previous case, but with memory_order_relaxed for store. + // Ensure that atomics don't over-synchronize (that would happen if e.g. we + // would intercept underlying pthread_mutex_t operations in libatomic + // emulation). + if (p->a.load(std::memory_order_acquire) == T(0)) { + p->v = 42; + p->a.store(T(17), std::memory_order_relaxed); + } else { + if (p->v != 42) { + fprintf(stderr, "%d: bad value %ld\n", __LINE__, p->v); + exit(0); + } + if (p->a.load(std::memory_order_relaxed) != T(17)) { + fprintf(stderr, "%d: bad atomic value %d\n", __LINE__, + (int)p->a.load()); + exit(0); + } + } +// CHECK: Test 2 forward +// CHECK: ThreadSanitizer: data race +// CHECK: Test 2 reverse +// CHECK: ThreadSanitizer: data race + + } else if (test == 3) { + // The same as the previous case, but with memory_order_relaxed for load. + if (p->a.load(std::memory_order_relaxed) == T(0)) { + p->v = 42; + p->a.store(T(17), std::memory_order_release); + } else { + if (p->v != 42) { + fprintf(stderr, "%d: bad value %ld\n", __LINE__, p->v); + exit(0); + } + if (p->a.load(std::memory_order_relaxed) != T(17)) { + fprintf(stderr, "%d: bad atomic value %d\n", __LINE__, + (int)p->a.load()); + exit(0); + } + } +// CHECK: Test 3 forward +// CHECK: ThreadSanitizer: data race +// CHECK: Test 3 reverse +// CHECK: ThreadSanitizer: data race + + } else if (test == 4) { + // Acquire-relase synchronization, but using exchange/compare_exchange. + T cmp(17); + if (!p->a.compare_exchange_strong(cmp, T(18), std::memory_order_acquire)) { + // libc++ has a bug that causes this check to fail: + // https://llvm.org/bugs/show_bug.cgi?id=30675 + if (false && cmp != T(0)) { + fprintf(stderr, "%d: bad atomic value %d\n", __LINE__, (int)cmp); + exit(0); + } + p->v = 42; + T old = p->a.exchange(T(17), std::memory_order_release); + if (old != T(0)) { + fprintf(stderr, "%d: bad atomic value %d\n", __LINE__, (int)old); + exit(0); + } + } else { + if (p->v != 42) { + fprintf(stderr, "%d: bad value %ld\n", __LINE__, p->v); + exit(0); + } + if (cmp != T(17)) { + fprintf(stderr, "%d: bad atomic value %d\n", __LINE__, (int)cmp); + exit(0); + } + if (p->a.load(std::memory_order_relaxed) != T(18)) { + fprintf(stderr, "%d: bad atomic value %d\n", __LINE__, + (int)p->a.load()); + exit(0); + } + } +// CHECK: Test 4 forward +// CHECK-NOT: ThreadSanitizer: data race +// CHECK: Test 4 reverse +// CHECK-NOT: ThreadSanitizer: data race + + } else if (test == 5) { + // The same as the previous case, but with memory_order_relaxed for + // exchange. + T cmp(17); + if (!p->a.compare_exchange_strong(cmp, T(18), std::memory_order_acquire)) { + p->v = 42; + p->a.exchange(T(17), std::memory_order_relaxed); + } else { + if (p->v != 42) { + fprintf(stderr, "%d: bad value %ld\n", __LINE__, p->v); + exit(0); + } + } +// CHECK: Test 5 forward +// CHECK: ThreadSanitizer: data race +// CHECK: Test 5 reverse +// CHECK: ThreadSanitizer: data race + + } else if (test == 6) { + // The same as the previous case, but with memory_order_relaxed for + // compare_exchange. + T cmp(17); + if (!p->a.compare_exchange_strong(cmp, T(18), std::memory_order_relaxed)) { + p->v = 42; + p->a.exchange(T(17), std::memory_order_release); + } else { + if (p->v != 42) { + fprintf(stderr, "%d: bad value %ld\n", __LINE__, p->v); + exit(0); + } + } +// CHECK: Test 6 forward +// CHECK: ThreadSanitizer: data race +// CHECK: Test 6 reverse +// CHECK: ThreadSanitizer: data race + } +} + +void *Thread(void *p) { + for (int i = 0; i < kTestCount; i++) { + Test(i, &data[i * 2], false); + barrier_wait(&barrier); + barrier_wait(&barrier); + fprintf(stderr, "Test %d reverse\n", i); + Test(i, &data[i * 2 + 1], false); + } + return 0; +} + +int main() { + fprintf(stderr, "data size: %zu\n", sizeof(T)); + for (int i = 0; i < 2 * kTestCount; i++) { + data[i].pad0 = 100; + data[i].pad1 = 200; + } + barrier_init(&barrier, 2); + pthread_t t; + pthread_create(&t, 0, Thread, 0); + for (int i = 0; i < kTestCount; i++) { + barrier_wait(&barrier); + fprintf(stderr, "Test %d forward\n", i); + Test(i, &data[2 * i], true); + Test(i, &data[2 * i + 1], true); + barrier_wait(&barrier); + } + pthread_join(t, 0); + for (int i = 0; i < 2 * kTestCount; i++) { + if (data[i].pad0 != 100 || data[i].pad1 != 200) { + fprintf(stderr, "padding overwritten %d: %lu/%lu\n", + i, data[i].pad0, data[i].pad1); +// CHECK-NOT: padding overwritten + } + } +}