diff --git a/compiler-rt/lib/tsan/CMakeLists.txt b/compiler-rt/lib/tsan/CMakeLists.txt --- a/compiler-rt/lib/tsan/CMakeLists.txt +++ b/compiler-rt/lib/tsan/CMakeLists.txt @@ -51,6 +51,7 @@ rtl/tsan_suppressions.cpp rtl/tsan_symbolize.cpp rtl/tsan_sync.cpp + rtl/tsan_vector_clock.cpp ) set(TSAN_CXX_SOURCES @@ -105,6 +106,7 @@ rtl/tsan_sync.h rtl/tsan_trace.h rtl/tsan_update_shadow_word_inl.h + rtl/tsan_vector_clock.h ) set(TSAN_RUNTIME_LIBRARIES) diff --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h --- a/compiler-rt/lib/tsan/rtl/tsan_defs.h +++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h @@ -18,6 +18,24 @@ #include "sanitizer_common/sanitizer_mutex.h" #include "ubsan/ubsan_platform.h" +#ifndef TSAN_VECTORIZE +# define TSAN_VECTORIZE __SSE4_2__ +#endif + +#if TSAN_VECTORIZE +// transitively includes , +// and it's prohibited to include std headers into tsan runtime. +// So we do this dirty trick. +# define _MM_MALLOC_H_INCLUDED +# define __MM_MALLOC_H +# include +# include +# define VECTOR_ALIGNED ALIGNED(16) +typedef __m128i m128; +#else +# define VECTOR_ALIGNED +#endif + // Setup defaults for compile definitions. #ifndef TSAN_NO_HISTORY # define TSAN_NO_HISTORY 0 @@ -33,6 +51,14 @@ namespace __tsan { +// Thread slot ID. +enum class Sid : u8 {}; +constexpr uptr kThreadSlotCount = 256; + +// Abstract time unit, vector clock element. +enum class Epoch : u16 {}; +constexpr Epoch kEpochZero = static_cast(0); + const int kClkBits = 42; const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -28,16 +28,6 @@ #include "tsan_symbolize.h" #include "ubsan/ubsan_init.h" -#ifdef __SSE3__ -// transitively includes , -// and it's prohibited to include std headers into tsan runtime. -// So we do this dirty trick. -#define _MM_MALLOC_H_INCLUDED -#define __MM_MALLOC_H -#include -typedef __m128i m128; -#endif - volatile int __tsan_resumed = 0; extern "C" void __tsan_resume() { @@ -779,10 +769,11 @@ return false; } -#if defined(__SSE3__) -#define SHUF(v0, v1, i0, i1, i2, i3) _mm_castps_si128(_mm_shuffle_ps( \ - _mm_castsi128_ps(v0), _mm_castsi128_ps(v1), \ - (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64)) +#if TSAN_VECTORIZE +# define SHUF(v0, v1, i0, i1, i2, i3) \ + _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \ + _mm_castsi128_ps(v1), \ + (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64)) ALWAYS_INLINE bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) { // This is an optimized version of ContainsSameAccessSlow. @@ -839,7 +830,7 @@ ALWAYS_INLINE bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) { -#if defined(__SSE3__) +#if TSAN_VECTORIZE bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write); // NOTE: this check can fail if the shadow is concurrently mutated // by other threads. But it still can be useful if you modify diff --git a/compiler-rt/lib/tsan/rtl/tsan_vector_clock.h b/compiler-rt/lib/tsan/rtl/tsan_vector_clock.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/tsan_vector_clock.h @@ -0,0 +1,51 @@ +//===-- tsan_vector_clock.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of ThreadSanitizer (TSan), a race detector. +// +//===----------------------------------------------------------------------===// +#ifndef TSAN_VECTOR_CLOCK_H +#define TSAN_VECTOR_CLOCK_H + +#include "tsan_defs.h" + +namespace __tsan { + +// Fixed-size vector clock, used both for threads and sync objects. +class VectorClock { + public: + VectorClock(); + + Epoch Get(Sid sid) const; + void Set(Sid sid, Epoch v); + + void Reset(); + void Acquire(const VectorClock* src); + void Release(VectorClock** dstp) const; + void ReleaseStore(VectorClock** dstp) const; + void ReleaseStoreAcquire(VectorClock** dstp); + void ReleaseAcquire(VectorClock** dstp); + + VectorClock& operator=(const VectorClock& other); + + private: + Epoch clk_[kThreadSlotCount] VECTOR_ALIGNED; +}; + +ALWAYS_INLINE Epoch VectorClock::Get(Sid sid) const { + return clk_[static_cast(sid)]; +} + +ALWAYS_INLINE void VectorClock::Set(Sid sid, Epoch v) { + DCHECK_GE(v, clk_[static_cast(sid)]); + clk_[static_cast(sid)] = v; +} + +} // namespace __tsan + +#endif // TSAN_VECTOR_CLOCK_H diff --git a/compiler-rt/lib/tsan/rtl/tsan_vector_clock.cpp b/compiler-rt/lib/tsan/rtl/tsan_vector_clock.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/tsan_vector_clock.cpp @@ -0,0 +1,123 @@ +//===-- tsan_vector_clock.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of ThreadSanitizer (TSan), a race detector. +// +//===----------------------------------------------------------------------===// +#include "tsan_vector_clock.h" + +#include "sanitizer_common/sanitizer_placement_new.h" +#include "tsan_mman.h" + +namespace __tsan { + +#if TSAN_VECTORIZE +const uptr kVectorClockSize = kThreadSlotCount * sizeof(Epoch) / sizeof(m128); +#endif + +VectorClock::VectorClock() { Reset(); } + +void VectorClock::Reset() { +#if !TSAN_VECTORIZE + for (uptr i = 0; i < kMaxSid; i++) clk_[i] = kEpochZero; +#else + m128 z = _mm_setzero_si128(); + m128* vclk = reinterpret_cast(clk_); + for (uptr i = 0; i < kVectorClockSize; i++) _mm_store_si128(&vclk[i], z); +#endif +} + +void VectorClock::Acquire(const VectorClock* src) { + if (!src) + return; +#if !TSAN_VECTORIZE + for (uptr i = 0; i < kMaxSid; i++) clk_[i] = max(clk_[i], src->clk_[i]); +#else + m128* __restrict vdst = reinterpret_cast(clk_); + m128 const* __restrict vsrc = reinterpret_cast(src->clk_); + for (uptr i = 0; i < kVectorClockSize; i++) { + m128 s = _mm_load_si128(&vsrc[i]); + m128 d = _mm_load_si128(&vdst[i]); + m128 m = _mm_max_epu16(s, d); + _mm_store_si128(&vdst[i], m); + } +#endif +} + +static VectorClock* AllocClock(VectorClock** dstp) { + if (UNLIKELY(!*dstp)) + *dstp = New(); + return *dstp; +} + +void VectorClock::Release(VectorClock** dstp) const { + VectorClock* dst = AllocClock(dstp); + dst->Acquire(this); +} + +void VectorClock::ReleaseStore(VectorClock** dstp) const { + VectorClock* dst = AllocClock(dstp); + *dst = *this; +} + +VectorClock& VectorClock::operator=(const VectorClock& other) { +#if !TSAN_VECTORIZE + for (uptr i = 0; i < kMaxSid; i++) clk_[i] = other.clk_[i]; +#else + m128* __restrict vdst = reinterpret_cast(clk_); + m128 const* __restrict vsrc = reinterpret_cast(other.clk_); + for (uptr i = 0; i < kVectorClockSize; i++) { + m128 s = _mm_load_si128(&vsrc[i]); + _mm_store_si128(&vdst[i], s); + } +#endif + return *this; +} + +void VectorClock::ReleaseStoreAcquire(VectorClock** dstp) { + VectorClock* dst = AllocClock(dstp); +#if !TSAN_VECTORIZE + for (uptr i = 0; i < kMaxSid; i++) { + Epoch tmp = dst->clk_[i]; + dst->clk_[i] = clk_[i]; + clk_[i] = max(clk_[i], tmp); + } +#else + m128* __restrict vdst = reinterpret_cast(dst->clk_); + m128* __restrict vclk = reinterpret_cast(clk_); + for (uptr i = 0; i < kVectorClockSize; i++) { + m128 t = _mm_load_si128(&vdst[i]); + m128 c = _mm_load_si128(&vclk[i]); + m128 m = _mm_max_epu16(c, t); + _mm_store_si128(&vdst[i], c); + _mm_store_si128(&vclk[i], m); + } +#endif +} + +void VectorClock::ReleaseAcquire(VectorClock** dstp) { + VectorClock* dst = AllocClock(dstp); +#if !TSAN_VECTORIZE + for (uptr i = 0; i < kMaxSid; i++) { + dst->clk_[i] = max(dst->clk_[i], clk_[i]); + clk_[i] = dst->clk_[i]; + } +#else + m128* __restrict vdst = reinterpret_cast(dst->clk_); + m128* __restrict vclk = reinterpret_cast(clk_); + for (uptr i = 0; i < kVectorClockSize; i++) { + m128 c = _mm_load_si128(&vclk[i]); + m128 d = _mm_load_si128(&vdst[i]); + m128 m = _mm_max_epu16(c, d); + _mm_store_si128(&vdst[i], m); + _mm_store_si128(&vclk[i], m); + } +#endif +} + +} // namespace __tsan diff --git a/compiler-rt/lib/tsan/tests/unit/CMakeLists.txt b/compiler-rt/lib/tsan/tests/unit/CMakeLists.txt --- a/compiler-rt/lib/tsan/tests/unit/CMakeLists.txt +++ b/compiler-rt/lib/tsan/tests/unit/CMakeLists.txt @@ -8,6 +8,7 @@ tsan_stack_test.cpp tsan_sync_test.cpp tsan_unit_test_main.cpp + tsan_vector_clock_test.cpp ) add_tsan_unittest(TsanUnitTest diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_vector_clock_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_vector_clock_test.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/tsan/tests/unit/tsan_vector_clock_test.cpp @@ -0,0 +1,101 @@ +//===-- tsan_clock_test.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of ThreadSanitizer (TSan), a race detector. +// +//===----------------------------------------------------------------------===// +#include "tsan_vector_clock.h" + +#include "gtest/gtest.h" +#include "tsan_rtl.h" + +namespace __tsan { + +TEST(VectorClock, GetSet) { + // Compiler won't ensure alignment on stack. + VectorClock *vc = New(); + for (uptr i = 0; i < kThreadSlotCount; i++) + ASSERT_EQ(vc->Get(static_cast(i)), kEpochZero); + for (uptr i = 0; i < kThreadSlotCount; i++) + vc->Set(static_cast(i), static_cast(i)); + for (uptr i = 0; i < kThreadSlotCount; i++) + ASSERT_EQ(vc->Get(static_cast(i)), static_cast(i)); + vc->Reset(); + for (uptr i = 0; i < kThreadSlotCount; i++) + ASSERT_EQ(vc->Get(static_cast(i)), kEpochZero); + DestroyAndFree(vc); +} + +TEST(VectorClock, VectorOps) { + VectorClock *vc1 = New(); + VectorClock *vc2 = nullptr; + VectorClock *vc3 = nullptr; + + vc1->Acquire(vc2); + for (uptr i = 0; i < kThreadSlotCount; i++) + ASSERT_EQ(vc1->Get(static_cast(i)), kEpochZero); + vc1->Release(&vc2); + EXPECT_NE(vc2, nullptr); + vc1->Acquire(vc2); + for (uptr i = 0; i < kThreadSlotCount; i++) + ASSERT_EQ(vc1->Get(static_cast(i)), kEpochZero); + + for (uptr i = 0; i < kThreadSlotCount; i++) { + vc1->Set(static_cast(i), static_cast(i)); + vc2->Set(static_cast(i), static_cast(kThreadSlotCount - i)); + } + vc1->Acquire(vc2); + for (uptr i = 0; i < kThreadSlotCount; i++) { + ASSERT_EQ(vc1->Get(static_cast(i)), + static_cast(i < kThreadSlotCount / 2 ? kThreadSlotCount - i + : i)); + ASSERT_EQ(vc2->Get(static_cast(i)), + static_cast(kThreadSlotCount - i)); + } + vc2->ReleaseStore(&vc3); + for (uptr i = 0; i < kThreadSlotCount; i++) { + ASSERT_EQ(vc3->Get(static_cast(i)), + static_cast(kThreadSlotCount - i)); + ASSERT_EQ(vc2->Get(static_cast(i)), + static_cast(kThreadSlotCount - i)); + } + + vc1->Reset(); + vc2->Reset(); + for (uptr i = 0; i < kThreadSlotCount; i++) { + vc1->Set(static_cast(i), static_cast(i)); + vc2->Set(static_cast(i), static_cast(kThreadSlotCount - i)); + } + vc1->ReleaseAcquire(&vc2); + for (uptr i = 0; i < kThreadSlotCount; i++) { + Epoch expect = + static_cast(i < kThreadSlotCount / 2 ? kThreadSlotCount - i : i); + ASSERT_EQ(vc1->Get(static_cast(i)), expect); + ASSERT_EQ(vc2->Get(static_cast(i)), expect); + } + + vc1->Reset(); + vc2->Reset(); + for (uptr i = 0; i < kThreadSlotCount; i++) { + vc1->Set(static_cast(i), static_cast(i)); + vc2->Set(static_cast(i), static_cast(kThreadSlotCount - i)); + } + vc1->ReleaseStoreAcquire(&vc2); + for (uptr i = 0; i < kThreadSlotCount; i++) { + ASSERT_EQ(vc1->Get(static_cast(i)), + static_cast(i < kThreadSlotCount / 2 ? kThreadSlotCount - i + : i)); + ASSERT_EQ(vc2->Get(static_cast(i)), static_cast(i)); + } + + DestroyAndFree(vc1); + DestroyAndFree(vc2); + DestroyAndFree(vc3); +} + +} // namespace __tsan