diff --git a/compiler-rt/lib/tsan/go/build.bat b/compiler-rt/lib/tsan/go/build.bat --- a/compiler-rt/lib/tsan/go/build.bat +++ b/compiler-rt/lib/tsan/go/build.bat @@ -1,7 +1,6 @@ type ^ tsan_go.cpp ^ ..\rtl\tsan_interface_atomic.cpp ^ - ..\rtl\tsan_clock.cpp ^ ..\rtl\tsan_flags.cpp ^ ..\rtl\tsan_md5.cpp ^ ..\rtl\tsan_report.cpp ^ diff --git a/compiler-rt/lib/tsan/go/buildgo.sh b/compiler-rt/lib/tsan/go/buildgo.sh --- a/compiler-rt/lib/tsan/go/buildgo.sh +++ b/compiler-rt/lib/tsan/go/buildgo.sh @@ -4,7 +4,6 @@ SRCS=" tsan_go.cpp - ../rtl/tsan_clock.cpp ../rtl/tsan_external.cpp ../rtl/tsan_flags.cpp ../rtl/tsan_interface_atomic.cpp diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt --- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt +++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt @@ -17,7 +17,6 @@ append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread TSAN_DYNAMIC_LINK_LIBS) set(TSAN_SOURCES - tsan_clock.cpp tsan_debugging.cpp tsan_external.cpp tsan_fd.cpp @@ -77,7 +76,6 @@ endif() set(TSAN_HEADERS - tsan_clock.h tsan_defs.h tsan_dense_alloc.h tsan_fd.h diff --git a/compiler-rt/lib/tsan/rtl/tsan_clock.h b/compiler-rt/lib/tsan/rtl/tsan_clock.h deleted file mode 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_clock.h +++ /dev/null @@ -1,293 +0,0 @@ -//===-- tsan_clock.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of ThreadSanitizer (TSan), a race detector. -// -//===----------------------------------------------------------------------===// -#ifndef TSAN_CLOCK_H -#define TSAN_CLOCK_H - -#include "tsan_defs.h" -#include "tsan_dense_alloc.h" - -namespace __tsan { - -typedef DenseSlabAlloc ClockAlloc; -typedef DenseSlabAllocCache ClockCache; - -// The clock that lives in sync variables (mutexes, atomics, etc). -class SyncClock { - public: - SyncClock(); - ~SyncClock(); - - uptr size() const; - - // These are used only in tests. - u64 get(unsigned tid) const; - u64 get_clean(unsigned tid) const; - - void Resize(ClockCache *c, uptr nclk); - void Reset(ClockCache *c); - - void DebugDump(int(*printf)(const char *s, ...)); - - // Clock element iterator. - // Note: it iterates only over the table without regard to dirty entries. - class Iter { - public: - explicit Iter(SyncClock* parent); - Iter& operator++(); - bool operator!=(const Iter& other); - ClockElem &operator*(); - - private: - SyncClock *parent_; - // [pos_, end_) is the current continuous range of clock elements. - ClockElem *pos_; - ClockElem *end_; - int block_; // Current number of second level block. - - NOINLINE void Next(); - }; - - Iter begin(); - Iter end(); - - private: - friend class ThreadClock; - friend class Iter; - static const uptr kDirtyTids = 2; - - struct Dirty { - u32 tid() const { return tid_ == kShortInvalidTid ? kInvalidTid : tid_; } - void set_tid(u32 tid) { - tid_ = tid == kInvalidTid ? kShortInvalidTid : tid; - } - u64 epoch : kClkBits; - - private: - // Full kInvalidTid won't fit into Dirty::tid. - static const u64 kShortInvalidTid = (1ull << (64 - kClkBits)) - 1; - u64 tid_ : 64 - kClkBits; // kInvalidId if not active - }; - - static_assert(sizeof(Dirty) == 8, "Dirty is not 64bit"); - - unsigned release_store_tid_; - unsigned release_store_reused_; - Dirty dirty_[kDirtyTids]; - // If size_ is 0, tab_ is nullptr. - // If size <= 64 (kClockCount), tab_ contains pointer to an array with - // 64 ClockElem's (ClockBlock::clock). - // Otherwise, tab_ points to an array with up to 127 u32 elements, - // each pointing to the second-level 512b block with 64 ClockElem's. - // Unused space in the first level ClockBlock is used to store additional - // clock elements. - // The last u32 element in the first level ClockBlock is always used as - // reference counter. - // - // See the following scheme for details. - // All memory blocks are 512 bytes (allocated from ClockAlloc). - // Clock (clk) elements are 64 bits. - // Idx and ref are 32 bits. - // - // tab_ - // | - // \/ - // +----------------------------------------------------+ - // | clk128 | clk129 | ...unused... | idx1 | idx0 | ref | - // +----------------------------------------------------+ - // | | - // | \/ - // | +----------------+ - // | | clk0 ... clk63 | - // | +----------------+ - // \/ - // +------------------+ - // | clk64 ... clk127 | - // +------------------+ - // - // Note: dirty entries, if active, always override what's stored in the clock. - ClockBlock *tab_; - u32 tab_idx_; - u16 size_; - u16 blocks_; // Number of second level blocks. - - void Unshare(ClockCache *c); - bool IsShared() const; - bool Cachable() const; - void ResetImpl(); - void FlushDirty(); - uptr capacity() const; - u32 get_block(uptr bi) const; - void append_block(u32 idx); - ClockElem &elem(unsigned tid) const; -}; - -// The clock that lives in threads. -class ThreadClock { - public: - typedef DenseSlabAllocCache Cache; - - explicit ThreadClock(unsigned tid, unsigned reused = 0); - - u64 get(unsigned tid) const; - void set(ClockCache *c, unsigned tid, u64 v); - void set(u64 v); - void tick(); - uptr size() const; - - void acquire(ClockCache *c, SyncClock *src); - void releaseStoreAcquire(ClockCache *c, SyncClock *src); - void release(ClockCache *c, SyncClock *dst); - void acq_rel(ClockCache *c, SyncClock *dst); - void ReleaseStore(ClockCache *c, SyncClock *dst); - void ResetCached(ClockCache *c); - void NoteGlobalAcquire(u64 v); - - void DebugReset(); - void DebugDump(int(*printf)(const char *s, ...)); - - private: - static const uptr kDirtyTids = SyncClock::kDirtyTids; - // Index of the thread associated with he clock ("current thread"). - const unsigned tid_; - const unsigned reused_; // tid_ reuse count. - // Current thread time when it acquired something from other threads. - u64 last_acquire_; - - // Last time another thread has done a global acquire of this thread's clock. - // It helps to avoid problem described in: - // https://github.com/golang/go/issues/39186 - // See test/tsan/java_finalizer2.cpp for a regression test. - // Note the failuire is _extremely_ hard to hit, so if you are trying - // to reproduce it, you may want to run something like: - // $ go get golang.org/x/tools/cmd/stress - // $ stress -p=64 ./a.out - // - // The crux of the problem is roughly as follows. - // A number of O(1) optimizations in the clocks algorithm assume proper - // transitive cumulative propagation of clock values. The AcquireGlobal - // operation may produce an inconsistent non-linearazable view of - // thread clocks. Namely, it may acquire a later value from a thread - // with a higher ID, but fail to acquire an earlier value from a thread - // with a lower ID. If a thread that executed AcquireGlobal then releases - // to a sync clock, it will spoil the sync clock with the inconsistent - // values. If another thread later releases to the sync clock, the optimized - // algorithm may break. - // - // The exact sequence of events that leads to the failure. - // - thread 1 executes AcquireGlobal - // - thread 1 acquires value 1 for thread 2 - // - thread 2 increments clock to 2 - // - thread 2 releases to sync object 1 - // - thread 3 at time 1 - // - thread 3 acquires from sync object 1 - // - thread 3 increments clock to 2 - // - thread 1 acquires value 2 for thread 3 - // - thread 1 releases to sync object 2 - // - sync object 2 clock has 1 for thread 2 and 2 for thread 3 - // - thread 3 releases to sync object 2 - // - thread 3 sees value 2 in the clock for itself - // and decides that it has already released to the clock - // and did not acquire anything from other threads after that - // (the last_acquire_ check in release operation) - // - thread 3 does not update the value for thread 2 in the clock from 1 to 2 - // - thread 4 acquires from sync object 2 - // - thread 4 detects a false race with thread 2 - // as it should have been synchronized with thread 2 up to time 2, - // but because of the broken clock it is now synchronized only up to time 1 - // - // The global_acquire_ value helps to prevent this scenario. - // Namely, thread 3 will not trust any own clock values up to global_acquire_ - // for the purposes of the last_acquire_ optimization. - atomic_uint64_t global_acquire_; - - // Cached SyncClock (without dirty entries and release_store_tid_). - // We reuse it for subsequent store-release operations without intervening - // acquire operations. Since it is shared (and thus constant), clock value - // for the current thread is then stored in dirty entries in the SyncClock. - // We host a reference to the table while it is cached here. - u32 cached_idx_; - u16 cached_size_; - u16 cached_blocks_; - - // Number of active elements in the clk_ table (the rest is zeros). - uptr nclk_; - u64 clk_[kMaxTidInClock]; // Fixed size vector clock. - - bool IsAlreadyAcquired(const SyncClock *src) const; - bool HasAcquiredAfterRelease(const SyncClock *dst) const; - void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const; -}; - -ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const { - DCHECK_LT(tid, kMaxTidInClock); - return clk_[tid]; -} - -ALWAYS_INLINE void ThreadClock::set(u64 v) { - DCHECK_GE(v, clk_[tid_]); - clk_[tid_] = v; -} - -ALWAYS_INLINE void ThreadClock::tick() { - clk_[tid_]++; -} - -ALWAYS_INLINE uptr ThreadClock::size() const { - return nclk_; -} - -ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) { - // Here we rely on the fact that AcquireGlobal is protected by - // ThreadRegistryLock, thus only one thread at a time executes it - // and values passed to this function should not go backwards. - CHECK_LE(atomic_load_relaxed(&global_acquire_), v); - atomic_store_relaxed(&global_acquire_, v); -} - -ALWAYS_INLINE SyncClock::Iter SyncClock::begin() { - return Iter(this); -} - -ALWAYS_INLINE SyncClock::Iter SyncClock::end() { - return Iter(nullptr); -} - -ALWAYS_INLINE uptr SyncClock::size() const { - return size_; -} - -ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent) - : parent_(parent) - , pos_(nullptr) - , end_(nullptr) - , block_(-1) { - if (parent) - Next(); -} - -ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() { - pos_++; - if (UNLIKELY(pos_ >= end_)) - Next(); - return *this; -} - -ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) { - return parent_ != other.parent_; -} - -ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() { - return *pos_; -} -} // namespace __tsan - -#endif // TSAN_CLOCK_H diff --git a/compiler-rt/lib/tsan/rtl/tsan_clock.cpp b/compiler-rt/lib/tsan/rtl/tsan_clock.cpp deleted file mode 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_clock.cpp +++ /dev/null @@ -1,625 +0,0 @@ -//===-- tsan_clock.cpp ----------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of ThreadSanitizer (TSan), a race detector. -// -//===----------------------------------------------------------------------===// -#include "tsan_clock.h" -#include "tsan_rtl.h" -#include "sanitizer_common/sanitizer_placement_new.h" - -// SyncClock and ThreadClock implement vector clocks for sync variables -// (mutexes, atomic variables, file descriptors, etc) and threads, respectively. -// ThreadClock contains fixed-size vector clock for maximum number of threads. -// SyncClock contains growable vector clock for currently necessary number of -// threads. -// Together they implement very simple model of operations, namely: -// -// void ThreadClock::acquire(const SyncClock *src) { -// for (int i = 0; i < kMaxThreads; i++) -// clock[i] = max(clock[i], src->clock[i]); -// } -// -// void ThreadClock::release(SyncClock *dst) const { -// for (int i = 0; i < kMaxThreads; i++) -// dst->clock[i] = max(dst->clock[i], clock[i]); -// } -// -// void ThreadClock::releaseStoreAcquire(SyncClock *sc) const { -// for (int i = 0; i < kMaxThreads; i++) { -// tmp = clock[i]; -// clock[i] = max(clock[i], sc->clock[i]); -// sc->clock[i] = tmp; -// } -// } -// -// void ThreadClock::ReleaseStore(SyncClock *dst) const { -// for (int i = 0; i < kMaxThreads; i++) -// dst->clock[i] = clock[i]; -// } -// -// void ThreadClock::acq_rel(SyncClock *dst) { -// acquire(dst); -// release(dst); -// } -// -// Conformance to this model is extensively verified in tsan_clock_test.cpp. -// However, the implementation is significantly more complex. The complexity -// allows to implement important classes of use cases in O(1) instead of O(N). -// -// The use cases are: -// 1. Singleton/once atomic that has a single release-store operation followed -// by zillions of acquire-loads (the acquire-load is O(1)). -// 2. Thread-local mutex (both lock and unlock can be O(1)). -// 3. Leaf mutex (unlock is O(1)). -// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)). -// 5. An atomic with a single writer (writes can be O(1)). -// The implementation dynamically adopts to workload. So if an atomic is in -// read-only phase, these reads will be O(1); if it later switches to read/write -// phase, the implementation will correctly handle that by switching to O(N). -// -// Thread-safety note: all const operations on SyncClock's are conducted under -// a shared lock; all non-const operations on SyncClock's are conducted under -// an exclusive lock; ThreadClock's are private to respective threads and so -// do not need any protection. -// -// Description of SyncClock state: -// clk_ - variable size vector clock, low kClkBits hold timestamp, -// the remaining bits hold "acquired" flag (the actual value is thread's -// reused counter); -// if acquired == thr->reused_, then the respective thread has already -// acquired this clock (except possibly for dirty elements). -// dirty_ - holds up to two indices in the vector clock that other threads -// need to acquire regardless of "acquired" flag value; -// release_store_tid_ - denotes that the clock state is a result of -// release-store operation by the thread with release_store_tid_ index. -// release_store_reused_ - reuse count of release_store_tid_. - -namespace __tsan { - -static atomic_uint32_t *ref_ptr(ClockBlock *cb) { - return reinterpret_cast(&cb->table[ClockBlock::kRefIdx]); -} - -// Drop reference to the first level block idx. -static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) { - ClockBlock *cb = ctx->clock_alloc.Map(idx); - atomic_uint32_t *ref = ref_ptr(cb); - u32 v = atomic_load(ref, memory_order_acquire); - for (;;) { - CHECK_GT(v, 0); - if (v == 1) - break; - if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel)) - return; - } - // First level block owns second level blocks, so them as well. - for (uptr i = 0; i < blocks; i++) - ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]); - ctx->clock_alloc.Free(c, idx); -} - -ThreadClock::ThreadClock(unsigned tid, unsigned reused) - : tid_(tid) - , reused_(reused + 1) // 0 has special meaning - , last_acquire_() - , global_acquire_() - , cached_idx_() - , cached_size_() - , cached_blocks_() { - CHECK_LT(tid, kMaxTidInClock); - CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits); - nclk_ = tid_ + 1; - internal_memset(clk_, 0, sizeof(clk_)); -} - -void ThreadClock::ResetCached(ClockCache *c) { - if (cached_idx_) { - UnrefClockBlock(c, cached_idx_, cached_blocks_); - cached_idx_ = 0; - cached_size_ = 0; - cached_blocks_ = 0; - } -} - -void ThreadClock::acquire(ClockCache *c, SyncClock *src) { - DCHECK_LE(nclk_, kMaxTid); - DCHECK_LE(src->size_, kMaxTid); - - // Check if it's empty -> no need to do anything. - const uptr nclk = src->size_; - if (nclk == 0) - return; - - bool acquired = false; - for (unsigned i = 0; i < kDirtyTids; i++) { - SyncClock::Dirty dirty = src->dirty_[i]; - unsigned tid = dirty.tid(); - if (tid != kInvalidTid) { - if (clk_[tid] < dirty.epoch) { - clk_[tid] = dirty.epoch; - acquired = true; - } - } - } - - // Check if we've already acquired src after the last release operation on src - if (tid_ >= nclk || src->elem(tid_).reused != reused_) { - // O(N) acquire. - nclk_ = max(nclk_, nclk); - u64 *dst_pos = &clk_[0]; - for (ClockElem &src_elem : *src) { - u64 epoch = src_elem.epoch; - if (*dst_pos < epoch) { - *dst_pos = epoch; - acquired = true; - } - dst_pos++; - } - - // Remember that this thread has acquired this clock. - if (nclk > tid_) - src->elem(tid_).reused = reused_; - } - - if (acquired) { - last_acquire_ = clk_[tid_]; - ResetCached(c); - } -} - -void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) { - DCHECK_LE(nclk_, kMaxTid); - DCHECK_LE(sc->size_, kMaxTid); - - if (sc->size_ == 0) { - // ReleaseStore will correctly set release_store_tid_, - // which can be important for future operations. - ReleaseStore(c, sc); - return; - } - - nclk_ = max(nclk_, (uptr) sc->size_); - - // Check if we need to resize sc. - if (sc->size_ < nclk_) - sc->Resize(c, nclk_); - - bool acquired = false; - - sc->Unshare(c); - // Update sc->clk_. - sc->FlushDirty(); - uptr i = 0; - for (ClockElem &ce : *sc) { - u64 tmp = clk_[i]; - if (clk_[i] < ce.epoch) { - clk_[i] = ce.epoch; - acquired = true; - } - ce.epoch = tmp; - ce.reused = 0; - i++; - } - sc->release_store_tid_ = kInvalidTid; - sc->release_store_reused_ = 0; - - if (acquired) { - last_acquire_ = clk_[tid_]; - ResetCached(c); - } -} - -void ThreadClock::release(ClockCache *c, SyncClock *dst) { - DCHECK_LE(nclk_, kMaxTid); - DCHECK_LE(dst->size_, kMaxTid); - - if (dst->size_ == 0) { - // ReleaseStore will correctly set release_store_tid_, - // which can be important for future operations. - ReleaseStore(c, dst); - return; - } - - // Check if we need to resize dst. - if (dst->size_ < nclk_) - dst->Resize(c, nclk_); - - // Check if we had not acquired anything from other threads - // since the last release on dst. If so, we need to update - // only dst->elem(tid_). - if (!HasAcquiredAfterRelease(dst)) { - UpdateCurrentThread(c, dst); - if (dst->release_store_tid_ != tid_ || - dst->release_store_reused_ != reused_) - dst->release_store_tid_ = kInvalidTid; - return; - } - - // O(N) release. - dst->Unshare(c); - // First, remember whether we've acquired dst. - bool acquired = IsAlreadyAcquired(dst); - // Update dst->clk_. - dst->FlushDirty(); - uptr i = 0; - for (ClockElem &ce : *dst) { - ce.epoch = max(ce.epoch, clk_[i]); - ce.reused = 0; - i++; - } - // Clear 'acquired' flag in the remaining elements. - dst->release_store_tid_ = kInvalidTid; - dst->release_store_reused_ = 0; - // If we've acquired dst, remember this fact, - // so that we don't need to acquire it on next acquire. - if (acquired) - dst->elem(tid_).reused = reused_; -} - -void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) { - DCHECK_LE(nclk_, kMaxTid); - DCHECK_LE(dst->size_, kMaxTid); - - if (dst->size_ == 0 && cached_idx_ != 0) { - // Reuse the cached clock. - // Note: we could reuse/cache the cached clock in more cases: - // we could update the existing clock and cache it, or replace it with the - // currently cached clock and release the old one. And for a shared - // existing clock, we could replace it with the currently cached; - // or unshare, update and cache. But, for simplicity, we currently reuse - // cached clock only when the target clock is empty. - dst->tab_ = ctx->clock_alloc.Map(cached_idx_); - dst->tab_idx_ = cached_idx_; - dst->size_ = cached_size_; - dst->blocks_ = cached_blocks_; - CHECK_EQ(dst->dirty_[0].tid(), kInvalidTid); - // The cached clock is shared (immutable), - // so this is where we store the current clock. - dst->dirty_[0].set_tid(tid_); - dst->dirty_[0].epoch = clk_[tid_]; - dst->release_store_tid_ = tid_; - dst->release_store_reused_ = reused_; - // Remember that we don't need to acquire it in future. - dst->elem(tid_).reused = reused_; - // Grab a reference. - atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed); - return; - } - - // Check if we need to resize dst. - if (dst->size_ < nclk_) - dst->Resize(c, nclk_); - - if (dst->release_store_tid_ == tid_ && - dst->release_store_reused_ == reused_ && - !HasAcquiredAfterRelease(dst)) { - UpdateCurrentThread(c, dst); - return; - } - - // O(N) release-store. - dst->Unshare(c); - // Note: dst can be larger than this ThreadClock. - // This is fine since clk_ beyond size is all zeros. - uptr i = 0; - for (ClockElem &ce : *dst) { - ce.epoch = clk_[i]; - ce.reused = 0; - i++; - } - for (uptr i = 0; i < kDirtyTids; i++) dst->dirty_[i].set_tid(kInvalidTid); - dst->release_store_tid_ = tid_; - dst->release_store_reused_ = reused_; - // Remember that we don't need to acquire it in future. - dst->elem(tid_).reused = reused_; - - // If the resulting clock is cachable, cache it for future release operations. - // The clock is always cachable if we released to an empty sync object. - if (cached_idx_ == 0 && dst->Cachable()) { - // Grab a reference to the ClockBlock. - atomic_uint32_t *ref = ref_ptr(dst->tab_); - if (atomic_load(ref, memory_order_acquire) == 1) - atomic_store_relaxed(ref, 2); - else - atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed); - cached_idx_ = dst->tab_idx_; - cached_size_ = dst->size_; - cached_blocks_ = dst->blocks_; - } -} - -void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) { - acquire(c, dst); - ReleaseStore(c, dst); -} - -// Updates only single element related to the current thread in dst->clk_. -void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const { - // Update the threads time, but preserve 'acquired' flag. - for (unsigned i = 0; i < kDirtyTids; i++) { - SyncClock::Dirty *dirty = &dst->dirty_[i]; - const unsigned tid = dirty->tid(); - if (tid == tid_ || tid == kInvalidTid) { - dirty->set_tid(tid_); - dirty->epoch = clk_[tid_]; - return; - } - } - // Reset all 'acquired' flags, O(N). - // We are going to touch dst elements, so we need to unshare it. - dst->Unshare(c); - dst->elem(tid_).epoch = clk_[tid_]; - for (uptr i = 0; i < dst->size_; i++) - dst->elem(i).reused = 0; - dst->FlushDirty(); -} - -// Checks whether the current thread has already acquired src. -bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const { - if (src->elem(tid_).reused != reused_) - return false; - for (unsigned i = 0; i < kDirtyTids; i++) { - SyncClock::Dirty dirty = src->dirty_[i]; - if (dirty.tid() != kInvalidTid) { - if (clk_[dirty.tid()] < dirty.epoch) - return false; - } - } - return true; -} - -// Checks whether the current thread has acquired anything -// from other clocks after releasing to dst (directly or indirectly). -bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const { - const u64 my_epoch = dst->elem(tid_).epoch; - return my_epoch <= last_acquire_ || - my_epoch <= atomic_load_relaxed(&global_acquire_); -} - -// Sets a single element in the vector clock. -// This function is called only from weird places like AcquireGlobal. -void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) { - DCHECK_LT(tid, kMaxTid); - DCHECK_GE(v, clk_[tid]); - clk_[tid] = v; - if (nclk_ <= tid) - nclk_ = tid + 1; - last_acquire_ = clk_[tid_]; - ResetCached(c); -} - -void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) { - printf("clock=["); - for (uptr i = 0; i < nclk_; i++) - printf("%s%llu", i == 0 ? "" : ",", clk_[i]); - printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_); -} - -SyncClock::SyncClock() { - ResetImpl(); -} - -SyncClock::~SyncClock() { - // Reset must be called before dtor. - CHECK_EQ(size_, 0); - CHECK_EQ(blocks_, 0); - CHECK_EQ(tab_, 0); - CHECK_EQ(tab_idx_, 0); -} - -void SyncClock::Reset(ClockCache *c) { - if (size_) - UnrefClockBlock(c, tab_idx_, blocks_); - ResetImpl(); -} - -void SyncClock::ResetImpl() { - tab_ = 0; - tab_idx_ = 0; - size_ = 0; - blocks_ = 0; - release_store_tid_ = kInvalidTid; - release_store_reused_ = 0; - for (uptr i = 0; i < kDirtyTids; i++) dirty_[i].set_tid(kInvalidTid); -} - -void SyncClock::Resize(ClockCache *c, uptr nclk) { - Unshare(c); - if (nclk <= capacity()) { - // Memory is already allocated, just increase the size. - size_ = nclk; - return; - } - if (size_ == 0) { - // Grow from 0 to one-level table. - CHECK_EQ(size_, 0); - CHECK_EQ(blocks_, 0); - CHECK_EQ(tab_, 0); - CHECK_EQ(tab_idx_, 0); - tab_idx_ = ctx->clock_alloc.Alloc(c); - tab_ = ctx->clock_alloc.Map(tab_idx_); - internal_memset(tab_, 0, sizeof(*tab_)); - atomic_store_relaxed(ref_ptr(tab_), 1); - size_ = 1; - } else if (size_ > blocks_ * ClockBlock::kClockCount) { - u32 idx = ctx->clock_alloc.Alloc(c); - ClockBlock *new_cb = ctx->clock_alloc.Map(idx); - uptr top = size_ - blocks_ * ClockBlock::kClockCount; - CHECK_LT(top, ClockBlock::kClockCount); - const uptr move = top * sizeof(tab_->clock[0]); - internal_memcpy(&new_cb->clock[0], tab_->clock, move); - internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move); - internal_memset(tab_->clock, 0, move); - append_block(idx); - } - // At this point we have first level table allocated and all clock elements - // are evacuated from it to a second level block. - // Add second level tables as necessary. - while (nclk > capacity()) { - u32 idx = ctx->clock_alloc.Alloc(c); - ClockBlock *cb = ctx->clock_alloc.Map(idx); - internal_memset(cb, 0, sizeof(*cb)); - append_block(idx); - } - size_ = nclk; -} - -// Flushes all dirty elements into the main clock array. -void SyncClock::FlushDirty() { - for (unsigned i = 0; i < kDirtyTids; i++) { - Dirty *dirty = &dirty_[i]; - if (dirty->tid() != kInvalidTid) { - CHECK_LT(dirty->tid(), size_); - elem(dirty->tid()).epoch = dirty->epoch; - dirty->set_tid(kInvalidTid); - } - } -} - -bool SyncClock::IsShared() const { - if (size_ == 0) - return false; - atomic_uint32_t *ref = ref_ptr(tab_); - u32 v = atomic_load(ref, memory_order_acquire); - CHECK_GT(v, 0); - return v > 1; -} - -// Unshares the current clock if it's shared. -// Shared clocks are immutable, so they need to be unshared before any updates. -// Note: this does not apply to dirty entries as they are not shared. -void SyncClock::Unshare(ClockCache *c) { - if (!IsShared()) - return; - // First, copy current state into old. - SyncClock old; - old.tab_ = tab_; - old.tab_idx_ = tab_idx_; - old.size_ = size_; - old.blocks_ = blocks_; - old.release_store_tid_ = release_store_tid_; - old.release_store_reused_ = release_store_reused_; - for (unsigned i = 0; i < kDirtyTids; i++) - old.dirty_[i] = dirty_[i]; - // Then, clear current object. - ResetImpl(); - // Allocate brand new clock in the current object. - Resize(c, old.size_); - // Now copy state back into this object. - Iter old_iter(&old); - for (ClockElem &ce : *this) { - ce = *old_iter; - ++old_iter; - } - release_store_tid_ = old.release_store_tid_; - release_store_reused_ = old.release_store_reused_; - for (unsigned i = 0; i < kDirtyTids; i++) - dirty_[i] = old.dirty_[i]; - // Drop reference to old and delete if necessary. - old.Reset(c); -} - -// Can we cache this clock for future release operations? -ALWAYS_INLINE bool SyncClock::Cachable() const { - if (size_ == 0) - return false; - for (unsigned i = 0; i < kDirtyTids; i++) { - if (dirty_[i].tid() != kInvalidTid) - return false; - } - return atomic_load_relaxed(ref_ptr(tab_)) == 1; -} - -// elem linearizes the two-level structure into linear array. -// Note: this is used only for one time accesses, vector operations use -// the iterator as it is much faster. -ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const { - DCHECK_LT(tid, size_); - const uptr block = tid / ClockBlock::kClockCount; - DCHECK_LE(block, blocks_); - tid %= ClockBlock::kClockCount; - if (block == blocks_) - return tab_->clock[tid]; - u32 idx = get_block(block); - ClockBlock *cb = ctx->clock_alloc.Map(idx); - return cb->clock[tid]; -} - -ALWAYS_INLINE uptr SyncClock::capacity() const { - if (size_ == 0) - return 0; - uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]); - // How many clock elements we can fit into the first level block. - // +1 for ref counter. - uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio; - return blocks_ * ClockBlock::kClockCount + top; -} - -ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const { - DCHECK(size_); - DCHECK_LT(bi, blocks_); - return tab_->table[ClockBlock::kBlockIdx - bi]; -} - -ALWAYS_INLINE void SyncClock::append_block(u32 idx) { - uptr bi = blocks_++; - CHECK_EQ(get_block(bi), 0); - tab_->table[ClockBlock::kBlockIdx - bi] = idx; -} - -// Used only by tests. -u64 SyncClock::get(unsigned tid) const { - for (unsigned i = 0; i < kDirtyTids; i++) { - Dirty dirty = dirty_[i]; - if (dirty.tid() == tid) - return dirty.epoch; - } - return elem(tid).epoch; -} - -// Used only by Iter test. -u64 SyncClock::get_clean(unsigned tid) const { - return elem(tid).epoch; -} - -void SyncClock::DebugDump(int(*printf)(const char *s, ...)) { - printf("clock=["); - for (uptr i = 0; i < size_; i++) - printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch); - printf("] reused=["); - for (uptr i = 0; i < size_; i++) - printf("%s%llu", i == 0 ? "" : ",", elem(i).reused); - printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]", - release_store_tid_, release_store_reused_, dirty_[0].tid(), - dirty_[0].epoch, dirty_[1].tid(), dirty_[1].epoch); -} - -void SyncClock::Iter::Next() { - // Finished with the current block, move on to the next one. - block_++; - if (block_ < parent_->blocks_) { - // Iterate over the next second level block. - u32 idx = parent_->get_block(block_); - ClockBlock *cb = ctx->clock_alloc.Map(idx); - pos_ = &cb->clock[0]; - end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount, - ClockBlock::kClockCount); - return; - } - if (block_ == parent_->blocks_ && - parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) { - // Iterate over elements in the first level block. - pos_ = &parent_->tab_->clock[0]; - end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount, - ClockBlock::kClockCount); - return; - } - parent_ = nullptr; // denotes end -} -} // namespace __tsan diff --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h --- a/compiler-rt/lib/tsan/rtl/tsan_defs.h +++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h @@ -71,40 +71,6 @@ inline bool EpochOverflow(Epoch epoch) { return epoch == kEpochOver; } -const int kClkBits = 42; -const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1; - -struct ClockElem { - u64 epoch : kClkBits; - u64 reused : 64 - kClkBits; // tid reuse count -}; - -struct ClockBlock { - static const uptr kSize = 512; - static const uptr kTableSize = kSize / sizeof(u32); - static const uptr kClockCount = kSize / sizeof(ClockElem); - static const uptr kRefIdx = kTableSize - 1; - static const uptr kBlockIdx = kTableSize - 2; - - union { - u32 table[kTableSize]; - ClockElem clock[kClockCount]; - }; - - ClockBlock() { - } -}; - -const int kTidBits = 13; -// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is -// occupied by reference counter, so total number of elements we can store -// in SyncClock is kClockCount * (kTableSize - 1). -const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount; -#if !SANITIZER_GO -const unsigned kMaxTidInClock = kMaxTid * 2; // This includes msb 'freed' bit. -#else -const unsigned kMaxTidInClock = kMaxTid; // Go does not track freed memory. -#endif const uptr kShadowStackSize = 64 * 1024; // Count of shadow values in a shadow cell. diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -34,7 +34,6 @@ #include "sanitizer_common/sanitizer_suppressions.h" #include "sanitizer_common/sanitizer_thread_registry.h" #include "sanitizer_common/sanitizer_vector.h" -#include "tsan_clock.h" #include "tsan_defs.h" #include "tsan_flags.h" #include "tsan_ignoreset.h" @@ -323,8 +322,6 @@ InternalMmapVector fired_suppressions; DDetector *dd; - ClockAlloc clock_alloc; - Flags flags; fd_t memprof_fd; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -371,7 +371,6 @@ racy_stacks(), racy_addresses(), fired_suppressions_mtx(MutexTypeFired), - clock_alloc(LINKER_INITIALIZED, "clock allocator"), slot_mtx(MutexTypeSlots), resetting() { fired_suppressions.reserve(8); diff --git a/compiler-rt/lib/tsan/rtl/tsan_sync.h b/compiler-rt/lib/tsan/rtl/tsan_sync.h --- a/compiler-rt/lib/tsan/rtl/tsan_sync.h +++ b/compiler-rt/lib/tsan/rtl/tsan_sync.h @@ -15,7 +15,6 @@ #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_deadlock_detector_interface.h" -#include "tsan_clock.h" #include "tsan_defs.h" #include "tsan_dense_alloc.h" #include "tsan_shadow.h" diff --git a/compiler-rt/lib/tsan/tests/unit/CMakeLists.txt b/compiler-rt/lib/tsan/tests/unit/CMakeLists.txt --- a/compiler-rt/lib/tsan/tests/unit/CMakeLists.txt +++ b/compiler-rt/lib/tsan/tests/unit/CMakeLists.txt @@ -1,5 +1,4 @@ set(TSAN_UNIT_TEST_SOURCES - tsan_clock_test.cpp tsan_dense_alloc_test.cpp tsan_flags_test.cpp tsan_ilist_test.cpp diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cpp deleted file mode 100644 --- a/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cpp +++ /dev/null @@ -1,536 +0,0 @@ -//===-- tsan_clock_test.cpp -----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of ThreadSanitizer (TSan), a race detector. -// -//===----------------------------------------------------------------------===// -#include "tsan_clock.h" -#include "tsan_rtl.h" -#include "gtest/gtest.h" -#include -#include - -namespace __tsan { - -ClockCache cache; - -TEST(Clock, VectorBasic) { - ThreadClock clk(0); - ASSERT_EQ(clk.size(), 1U); - clk.tick(); - ASSERT_EQ(clk.size(), 1U); - ASSERT_EQ(clk.get(0), 1U); - clk.set(&cache, 3, clk.get(3) + 1); - ASSERT_EQ(clk.size(), 4U); - ASSERT_EQ(clk.get(0), 1U); - ASSERT_EQ(clk.get(1), 0U); - ASSERT_EQ(clk.get(2), 0U); - ASSERT_EQ(clk.get(3), 1U); - clk.set(&cache, 3, clk.get(3) + 1); - ASSERT_EQ(clk.get(3), 2U); -} - -TEST(Clock, ChunkedBasic) { - ThreadClock vector(0); - SyncClock chunked; - ASSERT_EQ(vector.size(), 1U); - ASSERT_EQ(chunked.size(), 0U); - vector.acquire(&cache, &chunked); - ASSERT_EQ(vector.size(), 1U); - ASSERT_EQ(chunked.size(), 0U); - vector.release(&cache, &chunked); - ASSERT_EQ(vector.size(), 1U); - ASSERT_EQ(chunked.size(), 1U); - vector.acq_rel(&cache, &chunked); - ASSERT_EQ(vector.size(), 1U); - ASSERT_EQ(chunked.size(), 1U); - chunked.Reset(&cache); -} - -static const uptr interesting_sizes[] = {0, 1, 2, 30, 61, 62, 63, 64, 65, 66, - 100, 124, 125, 126, 127, 128, 129, 130, 188, 189, 190, 191, 192, 193, 254, - 255}; - -TEST(Clock, Iter) { - const uptr n = ARRAY_SIZE(interesting_sizes); - for (uptr fi = 0; fi < n; fi++) { - const uptr size = interesting_sizes[fi]; - SyncClock sync; - ThreadClock vector(0); - for (uptr i = 0; i < size; i++) - vector.set(&cache, i, i + 1); - if (size != 0) - vector.release(&cache, &sync); - uptr i = 0; - for (ClockElem &ce : sync) { - ASSERT_LT(i, size); - ASSERT_EQ(sync.get_clean(i), ce.epoch); - i++; - } - ASSERT_EQ(i, size); - sync.Reset(&cache); - } -} - -TEST(Clock, AcquireRelease) { - ThreadClock vector1(100); - vector1.tick(); - SyncClock chunked; - vector1.release(&cache, &chunked); - ASSERT_EQ(chunked.size(), 101U); - ThreadClock vector2(0); - vector2.acquire(&cache, &chunked); - ASSERT_EQ(vector2.size(), 101U); - ASSERT_EQ(vector2.get(0), 0U); - ASSERT_EQ(vector2.get(1), 0U); - ASSERT_EQ(vector2.get(99), 0U); - ASSERT_EQ(vector2.get(100), 1U); - chunked.Reset(&cache); -} - -TEST(Clock, RepeatedAcquire) { - ThreadClock thr1(1); - thr1.tick(); - ThreadClock thr2(2); - thr2.tick(); - - SyncClock sync; - thr1.ReleaseStore(&cache, &sync); - - thr2.acquire(&cache, &sync); - thr2.acquire(&cache, &sync); - - sync.Reset(&cache); -} - -TEST(Clock, releaseStoreAcquire) { - ThreadClock thr0(0); - thr0.tick(); - ThreadClock thr1(1); - thr1.tick(); - SyncClock syncA; - SyncClock syncB; - ASSERT_EQ(syncA.size(), 0U); - ASSERT_EQ(syncB.size(), 0U); - thr1.releaseStoreAcquire(&cache, &syncB); - ASSERT_EQ(syncB.size(), 2U); // T0 and T1 - // releaseStoreAcquire to an empty SyncClock - thr0.releaseStoreAcquire(&cache, &syncA); - ASSERT_EQ(syncA.size(), 1U); - // releaseStoreAcquire from a non-empty SyncClock - // T0 learns about T1 - thr0.releaseStoreAcquire(&cache, &syncB); - // releaseStoreAcquire to the originally empty SyncClock - // T0 deposits info about T1 into syncA - thr0.releaseStoreAcquire(&cache, &syncA); - ASSERT_EQ(syncA.size(), 2U); - syncA.Reset(&cache); - syncB.Reset(&cache); -} - -TEST(Clock, ManyThreads) { - SyncClock chunked; - for (unsigned i = 0; i < 200; i++) { - ThreadClock vector(0); - vector.tick(); - vector.set(&cache, i, i + 1); - vector.release(&cache, &chunked); - ASSERT_EQ(i + 1, chunked.size()); - vector.acquire(&cache, &chunked); - ASSERT_EQ(i + 1, vector.size()); - } - - for (unsigned i = 0; i < 200; i++) { - printf("i=%d\n", i); - ASSERT_EQ(i + 1, chunked.get(i)); - } - - ThreadClock vector(1); - vector.acquire(&cache, &chunked); - ASSERT_EQ(200U, vector.size()); - for (unsigned i = 0; i < 200; i++) - ASSERT_EQ(i + 1, vector.get(i)); - - chunked.Reset(&cache); -} - -TEST(Clock, DifferentSizes) { - { - ThreadClock vector1(10); - vector1.tick(); - ThreadClock vector2(20); - vector2.tick(); - { - SyncClock chunked; - vector1.release(&cache, &chunked); - ASSERT_EQ(chunked.size(), 11U); - vector2.release(&cache, &chunked); - ASSERT_EQ(chunked.size(), 21U); - chunked.Reset(&cache); - } - { - SyncClock chunked; - vector2.release(&cache, &chunked); - ASSERT_EQ(chunked.size(), 21U); - vector1.release(&cache, &chunked); - ASSERT_EQ(chunked.size(), 21U); - chunked.Reset(&cache); - } - { - SyncClock chunked; - vector1.release(&cache, &chunked); - vector2.acquire(&cache, &chunked); - ASSERT_EQ(vector2.size(), 21U); - chunked.Reset(&cache); - } - { - SyncClock chunked; - vector2.release(&cache, &chunked); - vector1.acquire(&cache, &chunked); - ASSERT_EQ(vector1.size(), 21U); - chunked.Reset(&cache); - } - } -} - -TEST(Clock, Growth) { - { - ThreadClock vector(10); - vector.tick(); - vector.set(&cache, 5, 42); - SyncClock sync; - vector.release(&cache, &sync); - ASSERT_EQ(sync.size(), 11U); - ASSERT_EQ(sync.get(0), 0ULL); - ASSERT_EQ(sync.get(1), 0ULL); - ASSERT_EQ(sync.get(5), 42ULL); - ASSERT_EQ(sync.get(9), 0ULL); - ASSERT_EQ(sync.get(10), 1ULL); - sync.Reset(&cache); - } - { - ThreadClock vector1(10); - vector1.tick(); - ThreadClock vector2(20); - vector2.tick(); - SyncClock sync; - vector1.release(&cache, &sync); - vector2.release(&cache, &sync); - ASSERT_EQ(sync.size(), 21U); - ASSERT_EQ(sync.get(0), 0ULL); - ASSERT_EQ(sync.get(10), 1ULL); - ASSERT_EQ(sync.get(19), 0ULL); - ASSERT_EQ(sync.get(20), 1ULL); - sync.Reset(&cache); - } - { - ThreadClock vector(100); - vector.tick(); - vector.set(&cache, 5, 42); - vector.set(&cache, 90, 84); - SyncClock sync; - vector.release(&cache, &sync); - ASSERT_EQ(sync.size(), 101U); - ASSERT_EQ(sync.get(0), 0ULL); - ASSERT_EQ(sync.get(1), 0ULL); - ASSERT_EQ(sync.get(5), 42ULL); - ASSERT_EQ(sync.get(60), 0ULL); - ASSERT_EQ(sync.get(70), 0ULL); - ASSERT_EQ(sync.get(90), 84ULL); - ASSERT_EQ(sync.get(99), 0ULL); - ASSERT_EQ(sync.get(100), 1ULL); - sync.Reset(&cache); - } - { - ThreadClock vector1(10); - vector1.tick(); - ThreadClock vector2(100); - vector2.tick(); - SyncClock sync; - vector1.release(&cache, &sync); - vector2.release(&cache, &sync); - ASSERT_EQ(sync.size(), 101U); - ASSERT_EQ(sync.get(0), 0ULL); - ASSERT_EQ(sync.get(10), 1ULL); - ASSERT_EQ(sync.get(99), 0ULL); - ASSERT_EQ(sync.get(100), 1ULL); - sync.Reset(&cache); - } -} - -TEST(Clock, Growth2) { - // Test clock growth for every pair of sizes: - const uptr n = ARRAY_SIZE(interesting_sizes); - for (uptr fi = 0; fi < n; fi++) { - for (uptr ti = fi + 1; ti < n; ti++) { - const uptr from = interesting_sizes[fi]; - const uptr to = interesting_sizes[ti]; - SyncClock sync; - ThreadClock vector(0); - for (uptr i = 0; i < from; i++) - vector.set(&cache, i, i + 1); - if (from != 0) - vector.release(&cache, &sync); - ASSERT_EQ(sync.size(), from); - for (uptr i = 0; i < from; i++) - ASSERT_EQ(sync.get(i), i + 1); - for (uptr i = 0; i < to; i++) - vector.set(&cache, i, i + 1); - vector.release(&cache, &sync); - ASSERT_EQ(sync.size(), to); - for (uptr i = 0; i < to; i++) - ASSERT_EQ(sync.get(i), i + 1); - vector.set(&cache, to + 1, to + 1); - vector.release(&cache, &sync); - ASSERT_EQ(sync.size(), to + 2); - for (uptr i = 0; i < to; i++) - ASSERT_EQ(sync.get(i), i + 1); - ASSERT_EQ(sync.get(to), 0U); - ASSERT_EQ(sync.get(to + 1), to + 1); - sync.Reset(&cache); - } - } -} - -const uptr kThreads = 4; -const uptr kClocks = 4; - -// SimpleSyncClock and SimpleThreadClock implement the same thing as -// SyncClock and ThreadClock, but in a very simple way. -struct SimpleSyncClock { - u64 clock[kThreads]; - uptr size; - - SimpleSyncClock() { - Reset(); - } - - void Reset() { - size = 0; - for (uptr i = 0; i < kThreads; i++) - clock[i] = 0; - } - - bool verify(const SyncClock *other) const { - for (uptr i = 0; i < min(size, other->size()); i++) { - if (clock[i] != other->get(i)) - return false; - } - for (uptr i = min(size, other->size()); i < max(size, other->size()); i++) { - if (i < size && clock[i] != 0) - return false; - if (i < other->size() && other->get(i) != 0) - return false; - } - return true; - } -}; - -struct SimpleThreadClock { - u64 clock[kThreads]; - uptr size; - unsigned tid; - - explicit SimpleThreadClock(unsigned tid) { - this->tid = tid; - size = tid + 1; - for (uptr i = 0; i < kThreads; i++) - clock[i] = 0; - } - - void tick() { - clock[tid]++; - } - - void acquire(const SimpleSyncClock *src) { - if (size < src->size) - size = src->size; - for (uptr i = 0; i < kThreads; i++) - clock[i] = max(clock[i], src->clock[i]); - } - - void release(SimpleSyncClock *dst) const { - if (dst->size < size) - dst->size = size; - for (uptr i = 0; i < kThreads; i++) - dst->clock[i] = max(dst->clock[i], clock[i]); - } - - void releaseStoreAcquire(SimpleSyncClock *sc) { - if (sc->size < size) - sc->size = size; - else - size = sc->size; - for (uptr i = 0; i < kThreads; i++) { - uptr tmp = clock[i]; - clock[i] = max(sc->clock[i], clock[i]); - sc->clock[i] = tmp; - } - } - - void acq_rel(SimpleSyncClock *dst) { - acquire(dst); - release(dst); - } - - void ReleaseStore(SimpleSyncClock *dst) const { - if (dst->size < size) - dst->size = size; - for (uptr i = 0; i < kThreads; i++) - dst->clock[i] = clock[i]; - } - - bool verify(const ThreadClock *other) const { - for (uptr i = 0; i < min(size, other->size()); i++) { - if (clock[i] != other->get(i)) - return false; - } - for (uptr i = min(size, other->size()); i < max(size, other->size()); i++) { - if (i < size && clock[i] != 0) - return false; - if (i < other->size() && other->get(i) != 0) - return false; - } - return true; - } -}; - -static bool ClockFuzzer(bool printing) { - // Create kThreads thread clocks. - SimpleThreadClock *thr0[kThreads]; - ThreadClock *thr1[kThreads]; - unsigned reused[kThreads]; - for (unsigned i = 0; i < kThreads; i++) { - reused[i] = 0; - thr0[i] = new SimpleThreadClock(i); - thr1[i] = new ThreadClock(i, reused[i]); - } - - // Create kClocks sync clocks. - SimpleSyncClock *sync0[kClocks]; - SyncClock *sync1[kClocks]; - for (unsigned i = 0; i < kClocks; i++) { - sync0[i] = new SimpleSyncClock(); - sync1[i] = new SyncClock(); - } - - // Do N random operations (acquire, release, etc) and compare results - // for SimpleThread/SyncClock and real Thread/SyncClock. - for (int i = 0; i < 10000; i++) { - unsigned tid = rand() % kThreads; - unsigned cid = rand() % kClocks; - thr0[tid]->tick(); - thr1[tid]->tick(); - - switch (rand() % 7) { - case 0: - if (printing) - printf("acquire thr%d <- clk%d\n", tid, cid); - thr0[tid]->acquire(sync0[cid]); - thr1[tid]->acquire(&cache, sync1[cid]); - break; - case 1: - if (printing) - printf("release thr%d -> clk%d\n", tid, cid); - thr0[tid]->release(sync0[cid]); - thr1[tid]->release(&cache, sync1[cid]); - break; - case 2: - if (printing) - printf("acq_rel thr%d <> clk%d\n", tid, cid); - thr0[tid]->acq_rel(sync0[cid]); - thr1[tid]->acq_rel(&cache, sync1[cid]); - break; - case 3: - if (printing) - printf("rel_str thr%d >> clk%d\n", tid, cid); - thr0[tid]->ReleaseStore(sync0[cid]); - thr1[tid]->ReleaseStore(&cache, sync1[cid]); - break; - case 4: - if (printing) - printf("reset clk%d\n", cid); - sync0[cid]->Reset(); - sync1[cid]->Reset(&cache); - break; - case 5: - if (printing) - printf("releaseStoreAcquire thr%d -> clk%d\n", tid, cid); - thr0[tid]->releaseStoreAcquire(sync0[cid]); - thr1[tid]->releaseStoreAcquire(&cache, sync1[cid]); - break; - case 6: - if (printing) - printf("reset thr%d\n", tid); - u64 epoch = thr0[tid]->clock[tid] + 1; - reused[tid]++; - delete thr0[tid]; - thr0[tid] = new SimpleThreadClock(tid); - thr0[tid]->clock[tid] = epoch; - delete thr1[tid]; - thr1[tid] = new ThreadClock(tid, reused[tid]); - thr1[tid]->set(epoch); - break; - } - - if (printing) { - for (unsigned i = 0; i < kThreads; i++) { - printf("thr%d: ", i); - thr1[i]->DebugDump(printf); - printf("\n"); - } - for (unsigned i = 0; i < kClocks; i++) { - printf("clk%d: ", i); - sync1[i]->DebugDump(printf); - printf("\n"); - } - - printf("\n"); - } - - if (!thr0[tid]->verify(thr1[tid]) || !sync0[cid]->verify(sync1[cid])) { - if (!printing) - return false; - printf("differs with model:\n"); - for (unsigned i = 0; i < kThreads; i++) { - printf("thr%d: clock=[", i); - for (uptr j = 0; j < thr0[i]->size; j++) - printf("%s%llu", j == 0 ? "" : ",", thr0[i]->clock[j]); - printf("]\n"); - } - for (unsigned i = 0; i < kClocks; i++) { - printf("clk%d: clock=[", i); - for (uptr j = 0; j < sync0[i]->size; j++) - printf("%s%llu", j == 0 ? "" : ",", sync0[i]->clock[j]); - printf("]\n"); - } - return false; - } - } - - for (unsigned i = 0; i < kClocks; i++) { - sync1[i]->Reset(&cache); - } - return true; -} - -TEST(Clock, Fuzzer) { - struct timeval tv; - gettimeofday(&tv, NULL); - int seed = tv.tv_sec + tv.tv_usec; - printf("seed=%d\n", seed); - srand(seed); - if (!ClockFuzzer(false)) { - // Redo the test with the same seed, but logging operations. - srand(seed); - ClockFuzzer(true); - ASSERT_TRUE(false); - } -} - -} // namespace __tsan