Index: lib/sanitizer_common/sanitizer_thread_registry.h =================================================================== --- lib/sanitizer_common/sanitizer_thread_registry.h +++ lib/sanitizer_common/sanitizer_thread_registry.h @@ -68,6 +68,7 @@ virtual void OnStarted(void *arg) {} virtual void OnCreated(void *arg) {} virtual void OnReset() {} + virtual void OnDetached(void *arg) {} }; typedef ThreadContextBase* (*ThreadContextFactory)(u32 tid); @@ -111,6 +112,7 @@ void SetThreadName(u32 tid, const char *name); void SetThreadNameByUserId(uptr user_id, const char *name); void DetachThread(u32 tid); + void DetachThread(u32 tid, void *arg); void JoinThread(u32 tid, void *arg); void FinishThread(u32 tid); void StartThread(u32 tid, uptr os_id, void *arg); Index: lib/sanitizer_common/sanitizer_thread_registry.cc =================================================================== --- lib/sanitizer_common/sanitizer_thread_registry.cc +++ lib/sanitizer_common/sanitizer_thread_registry.cc @@ -219,6 +219,10 @@ } void ThreadRegistry::DetachThread(u32 tid) { + DetachThread(tid, 0); +} + +void ThreadRegistry::DetachThread(u32 tid, void *arg) { BlockingMutexLock l(&mtx_); CHECK_LT(tid, n_contexts_); ThreadContextBase *tctx = threads_[tid]; @@ -227,6 +231,7 @@ Report("%s: Detach of non-existent thread\n", SanitizerToolName); return; } + tctx->OnDetached(arg); if (tctx->status == ThreadStatusFinished) { tctx->SetDead(); QuarantinePush(tctx); Index: lib/tsan/rtl/tsan_clock.h =================================================================== --- lib/tsan/rtl/tsan_clock.h +++ lib/tsan/rtl/tsan_clock.h @@ -14,7 +14,7 @@ #define TSAN_CLOCK_H #include "tsan_defs.h" -#include "tsan_vector.h" +#include "tsan_dense_alloc.h" namespace __tsan { @@ -23,37 +23,64 @@ u64 reused : 64 - kClkBits; }; +struct ClockBlock { + static const uptr kSize = 512; + static const uptr kTableSize = kSize / sizeof(u32); + static const uptr kClockCount = kSize / sizeof(ClockElem); + + union { + u32 table[kTableSize]; + ClockElem clock[kClockCount]; + }; + + ClockBlock() { + } +}; + +typedef DenseSlabAlloc ClockAlloc; +typedef DenseSlabAllocCache ClockCache; + // The clock that lives in sync variables (mutexes, atomics, etc). class SyncClock { public: SyncClock(); + ~SyncClock(); uptr size() const { - return clk_.Size(); + return size_; } u64 get(unsigned tid) const { - DCHECK_LT(tid, clk_.Size()); - return clk_[tid].epoch; + return elem(tid).epoch; } - void Reset(); - void Zero(); + void Reset(ClockCache *c); void DebugDump(int(*printf)(const char *s, ...)); private: + friend struct ThreadClock; + static const uptr kDirtyTids = 2; + unsigned release_store_tid_; unsigned release_store_reused_; - static const uptr kDirtyTids = 2; unsigned dirty_tids_[kDirtyTids]; - mutable Vector clk_; - friend struct ThreadClock; + // tab_ contains indirect pointer to a 512b block using DenseSlabAlloc. + // If size_ <= 64, then tab_ points to an array with 64 ClockElem's. + // Otherwise, tab_ points to an array with 128 u32 elements, + // each pointing to the second-level 512b block with 64 ClockElem's. + ClockBlock *tab_; + u32 tab_idx_; + u32 size_; + + ClockElem &elem(unsigned tid) const; }; // The clock that lives in threads. struct ThreadClock { public: + typedef DenseSlabAllocCache Cache; + explicit ThreadClock(unsigned tid, unsigned reused = 0); u64 get(unsigned tid) const { @@ -76,10 +103,10 @@ return nclk_; } - void acquire(const SyncClock *src); - void release(SyncClock *dst) const; - void acq_rel(SyncClock *dst); - void ReleaseStore(SyncClock *dst) const; + void acquire(ClockCache *c, const SyncClock *src); + void release(ClockCache *c, SyncClock *dst) const; + void acq_rel(ClockCache *c, SyncClock *dst); + void ReleaseStore(ClockCache *c, SyncClock *dst) const; void DebugReset(); void DebugDump(int(*printf)(const char *s, ...)); @@ -94,6 +121,7 @@ bool IsAlreadyAcquired(const SyncClock *src) const; void UpdateCurrentThread(SyncClock *dst) const; + void Resize(ClockCache *c, SyncClock *dst) const; }; } // namespace __tsan Index: lib/tsan/rtl/tsan_clock.cc =================================================================== --- lib/tsan/rtl/tsan_clock.cc +++ lib/tsan/rtl/tsan_clock.cc @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "tsan_clock.h" #include "tsan_rtl.h" +#include "sanitizer_common/sanitizer_placement_new.h" // SyncClock and ThreadClock implement vector clocks for sync variables // (mutexes, atomic variables, file descriptors, etc) and threads, respectively. @@ -102,13 +103,13 @@ clk_[tid_].reused = reused_; } -void ThreadClock::acquire(const SyncClock *src) { +void ThreadClock::acquire(ClockCache *c, const SyncClock *src) { DCHECK(nclk_ <= kMaxTid); - DCHECK(src->clk_.Size() <= kMaxTid); + DCHECK(src->size_ <= kMaxTid); CPP_STAT_INC(StatClockAcquire); // Check if it's empty -> no need to do anything. - const uptr nclk = src->clk_.Size(); + const uptr nclk = src->size_; if (nclk == 0) { CPP_STAT_INC(StatClockAcquireEmpty); return; @@ -118,12 +119,12 @@ bool acquired = false; if (nclk > tid_) { CPP_STAT_INC(StatClockAcquireLarge); - if (src->clk_[tid_].reused == reused_) { + if (src->elem(tid_).reused == reused_) { CPP_STAT_INC(StatClockAcquireRepeat); for (unsigned i = 0; i < kDirtyTids; i++) { unsigned tid = src->dirty_tids_[i]; if (tid != kInvalidTid) { - u64 epoch = src->clk_[tid].epoch; + u64 epoch = src->elem(tid).epoch; if (clk_[tid].epoch < epoch) { clk_[tid].epoch = epoch; acquired = true; @@ -142,7 +143,7 @@ CPP_STAT_INC(StatClockAcquireFull); nclk_ = max(nclk_, nclk); for (uptr i = 0; i < nclk; i++) { - u64 epoch = src->clk_[i].epoch; + u64 epoch = src->elem(i).epoch; if (clk_[i].epoch < epoch) { clk_[i].epoch = epoch; acquired = true; @@ -151,7 +152,7 @@ // Remember that this thread has acquired this clock. if (nclk > tid_) - src->clk_[tid_].reused = reused_; + src->elem(tid_).reused = reused_; if (acquired) { CPP_STAT_INC(StatClockAcquiredSomething); @@ -159,28 +160,26 @@ } } -void ThreadClock::release(SyncClock *dst) const { +void ThreadClock::release(ClockCache *c, SyncClock *dst) const { DCHECK_LE(nclk_, kMaxTid); - DCHECK_LE(dst->clk_.Size(), kMaxTid); + DCHECK_LE(dst->size_, kMaxTid); - if (dst->clk_.Size() == 0) { + if (dst->size_ == 0) { // ReleaseStore will correctly set release_store_tid_, // which can be important for future operations. - ReleaseStore(dst); + ReleaseStore(c, dst); return; } CPP_STAT_INC(StatClockRelease); // Check if we need to resize dst. - if (dst->clk_.Size() < nclk_) { - CPP_STAT_INC(StatClockReleaseResize); - dst->clk_.Resize(nclk_); - } + if (dst->size_ < nclk_) + Resize(c, dst); // Check if we had not acquired anything from other threads // since the last release on dst. If so, we need to update - // only dst->clk_[tid_]. - if (dst->clk_[tid_].epoch > last_acquire_) { + // only dst->elem(tid_). + if (dst->elem(tid_).epoch > last_acquire_) { UpdateCurrentThread(dst); if (dst->release_store_tid_ != tid_ || dst->release_store_reused_ != reused_) @@ -196,14 +195,15 @@ CPP_STAT_INC(StatClockReleaseAcquired); // Update dst->clk_. for (uptr i = 0; i < nclk_; i++) { - dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch); - dst->clk_[i].reused = 0; + ClockElem &ce = dst->elem(i); + ce.epoch = max(ce.epoch, clk_[i].epoch); + ce.reused = 0; } // Clear 'acquired' flag in the remaining elements. - if (nclk_ < dst->clk_.Size()) + if (nclk_ < dst->size_) CPP_STAT_INC(StatClockReleaseClearTail); - for (uptr i = nclk_; i < dst->clk_.Size(); i++) - dst->clk_[i].reused = 0; + for (uptr i = nclk_; i < dst->size_; i++) + dst->elem(i).reused = 0; for (unsigned i = 0; i < kDirtyTids; i++) dst->dirty_tids_[i] = kInvalidTid; dst->release_store_tid_ = kInvalidTid; @@ -211,23 +211,21 @@ // If we've acquired dst, remember this fact, // so that we don't need to acquire it on next acquire. if (acquired) - dst->clk_[tid_].reused = reused_; + dst->elem(tid_).reused = reused_; } -void ThreadClock::ReleaseStore(SyncClock *dst) const { +void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const { DCHECK(nclk_ <= kMaxTid); - DCHECK(dst->clk_.Size() <= kMaxTid); + DCHECK(dst->size_ <= kMaxTid); CPP_STAT_INC(StatClockStore); // Check if we need to resize dst. - if (dst->clk_.Size() < nclk_) { - CPP_STAT_INC(StatClockStoreResize); - dst->clk_.Resize(nclk_); - } + if (dst->size_ < nclk_) + Resize(c, dst); if (dst->release_store_tid_ == tid_ && dst->release_store_reused_ == reused_ && - dst->clk_[tid_].epoch > last_acquire_) { + dst->elem(tid_).epoch > last_acquire_) { CPP_STAT_INC(StatClockStoreFast); UpdateCurrentThread(dst); return; @@ -236,13 +234,17 @@ // O(N) release-store. CPP_STAT_INC(StatClockStoreFull); for (uptr i = 0; i < nclk_; i++) { - dst->clk_[i].epoch = clk_[i].epoch; - dst->clk_[i].reused = 0; + ClockElem &ce = dst->elem(i); + ce.epoch = clk_[i].epoch; + ce.reused = 0; } // Clear the tail of dst->clk_. - if (nclk_ < dst->clk_.Size()) { - internal_memset(&dst->clk_[nclk_], 0, - (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0])); + if (nclk_ < dst->size_) { + for (uptr i = nclk_; i < dst->size_; i++) { + ClockElem &ce = dst->elem(i); + ce.epoch = 0; + ce.reused = 0; + } CPP_STAT_INC(StatClockStoreTail); } for (unsigned i = 0; i < kDirtyTids; i++) @@ -250,19 +252,19 @@ dst->release_store_tid_ = tid_; dst->release_store_reused_ = reused_; // Rememeber that we don't need to acquire it in future. - dst->clk_[tid_].reused = reused_; + dst->elem(tid_).reused = reused_; } -void ThreadClock::acq_rel(SyncClock *dst) { +void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) { CPP_STAT_INC(StatClockAcquireRelease); - acquire(dst); - ReleaseStore(dst); + acquire(c, dst); + ReleaseStore(c, dst); } // Updates only single element related to the current thread in dst->clk_. void ThreadClock::UpdateCurrentThread(SyncClock *dst) const { // Update the threads time, but preserve 'acquired' flag. - dst->clk_[tid_].epoch = clk_[tid_].epoch; + dst->elem(tid_).epoch = clk_[tid_].epoch; for (unsigned i = 0; i < kDirtyTids; i++) { if (dst->dirty_tids_[i] == tid_) { @@ -277,27 +279,73 @@ } // Reset all 'acquired' flags, O(N). CPP_STAT_INC(StatClockReleaseSlow); - for (uptr i = 0; i < dst->clk_.Size(); i++) { - dst->clk_[i].reused = 0; - } + for (uptr i = 0; i < dst->size_; i++) + dst->elem(i).reused = 0; for (unsigned i = 0; i < kDirtyTids; i++) dst->dirty_tids_[i] = kInvalidTid; } // Checks whether the current threads has already acquired src. bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const { - if (src->clk_[tid_].reused != reused_) + if (src->elem(tid_).reused != reused_) return false; for (unsigned i = 0; i < kDirtyTids; i++) { unsigned tid = src->dirty_tids_[i]; if (tid != kInvalidTid) { - if (clk_[tid].epoch < src->clk_[tid].epoch) + if (clk_[tid].epoch < src->elem(tid).epoch) return false; } } return true; } +void ThreadClock::Resize(ClockCache *c, SyncClock *dst) const { + CPP_STAT_INC(StatClockReleaseResize); + if (RoundUpTo(nclk_, ClockBlock::kClockCount) <= + RoundUpTo(dst->size_, ClockBlock::kClockCount)) { + // Growing within the same block. + // Memory is already allocated, just increase the size. + dst->size_ = nclk_; + return; + } + if (nclk_ <= ClockBlock::kClockCount) { + // Grow from 0 to one-level table. + CHECK_EQ(dst->size_, 0); + CHECK_EQ(dst->tab_, 0); + CHECK_EQ(dst->tab_idx_, 0); + dst->size_ = nclk_; + dst->tab_idx_ = ctx->clock_alloc.Alloc(c); + dst->tab_ = ctx->clock_alloc.Map(dst->tab_idx_); + internal_memset(dst->tab_, 0, sizeof(*dst->tab_)); + return; + } + // Growing two-level table. + if (dst->size_ == 0) { + // Allocate first level table. + dst->tab_idx_ = ctx->clock_alloc.Alloc(c); + dst->tab_ = ctx->clock_alloc.Map(dst->tab_idx_); + internal_memset(dst->tab_, 0, sizeof(*dst->tab_)); + } else if (dst->size_ <= ClockBlock::kClockCount) { + // Transform one-level table to two-level table. + u32 old = dst->tab_idx_; + dst->tab_idx_ = ctx->clock_alloc.Alloc(c); + dst->tab_ = ctx->clock_alloc.Map(dst->tab_idx_); + internal_memset(dst->tab_, 0, sizeof(*dst->tab_)); + dst->tab_->table[0] = old; + } + // At this point we have first level table allocated. + // Add second level tables as necessary. + for (uptr i = RoundUpTo(dst->size_, ClockBlock::kClockCount); + i < nclk_; i += ClockBlock::kClockCount) { + u32 idx = ctx->clock_alloc.Alloc(c); + ClockBlock *cb = ctx->clock_alloc.Map(idx); + internal_memset(cb, 0, sizeof(*cb)); + CHECK_EQ(dst->tab_->table[i/ClockBlock::kClockCount], 0); + dst->tab_->table[i/ClockBlock::kClockCount] = idx; + } + dst->size_ = nclk_; +} + // Sets a single element in the vector clock. // This function is called only from weird places like AcquireGlobal. void ThreadClock::set(unsigned tid, u64 v) { @@ -320,34 +368,59 @@ tid_, reused_, last_acquire_); } -SyncClock::SyncClock() - : clk_(MBlockClock) { +SyncClock::SyncClock() { + tab_ = 0; + tab_idx_ = 0; + size_ = 0; release_store_tid_ = kInvalidTid; release_store_reused_ = 0; for (uptr i = 0; i < kDirtyTids; i++) dirty_tids_[i] = kInvalidTid; } -void SyncClock::Reset() { - clk_.Reset(); - Zero(); -} - -void SyncClock::Zero() { - clk_.Resize(0); +SyncClock::~SyncClock() { + CHECK_EQ(size_, 0); + CHECK_EQ(tab_, 0); + CHECK_EQ(tab_idx_, 0); +} + +void SyncClock::Reset(ClockCache *c) { + if (size_ == 0) { + // nothing + } else if (size_ <= ClockBlock::kClockCount) { + // One-level table. + ctx->clock_alloc.Free(c, tab_idx_); + } else { + // Two-level table. + for (uptr i = 0; i < size_; i += ClockBlock::kClockCount) + ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]); + ctx->clock_alloc.Free(c, tab_idx_); + } + tab_ = 0; + tab_idx_ = 0; + size_ = 0; release_store_tid_ = kInvalidTid; release_store_reused_ = 0; for (uptr i = 0; i < kDirtyTids; i++) dirty_tids_[i] = kInvalidTid; } +ClockElem &SyncClock::elem(unsigned tid) const { + DCHECK_LT(tid, size_); + if (size_ <= ClockBlock::kClockCount) + return tab_->clock[tid]; + u32 idx = tab_->table[tid / ClockBlock::kClockCount]; + ClockBlock *cb = ctx->clock_alloc.Map(idx); + return cb->clock[tid % ClockBlock::kClockCount]; +} + void SyncClock::DebugDump(int(*printf)(const char *s, ...)) { printf("clock=["); - for (uptr i = 0; i < clk_.Size(); i++) - printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch); + for (uptr i = 0; i < size_; i++) + printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch); printf("] reused=["); - for (uptr i = 0; i < clk_.Size(); i++) - printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused); + for (uptr i = 0; i < size_; i++) + printf("%s%llu", i == 0 ? "" : ",", elem(i).reused); printf("] release_store_tid=%d/%d dirty_tids=%d/%d", release_store_tid_, release_store_reused_, dirty_tids_[0], dirty_tids_[1]); Index: lib/tsan/rtl/tsan_flags.cc =================================================================== --- lib/tsan/rtl/tsan_flags.cc +++ lib/tsan/rtl/tsan_flags.cc @@ -107,7 +107,7 @@ ParseCommonFlagsFromString(f, env); // Copy back to common flags. - *common_flags() = *f; + internal_memcpy(common_flags(), f, sizeof(*common_flags())); // Sanity check. if (!f->report_bugs) { Index: lib/tsan/rtl/tsan_rtl.h =================================================================== --- lib/tsan/rtl/tsan_rtl.h +++ lib/tsan/rtl/tsan_rtl.h @@ -374,6 +374,7 @@ DenseSlabAllocCache block_cache; DenseSlabAllocCache sync_cache; + DenseSlabAllocCache clock_cache; #ifndef TSAN_GO u32 last_sleep_stack_id; @@ -418,6 +419,7 @@ void OnStarted(void *arg); void OnCreated(void *arg); void OnReset(); + void OnDetached(void *arg); }; struct RacyStacks { @@ -466,6 +468,8 @@ InternalMmapVector fired_suppressions; DDetector *dd; + ClockAlloc clock_alloc; + Flags flags; u64 stat[StatCnt]; Index: lib/tsan/rtl/tsan_rtl_mutex.cc =================================================================== --- lib/tsan/rtl/tsan_rtl_mutex.cc +++ lib/tsan/rtl/tsan_rtl_mutex.cc @@ -118,7 +118,7 @@ u64 mid = s->GetId(); u32 last_lock = s->last_lock; if (!unlock_locked) - s->Reset(); // must not reset it before the report is printed + s->Reset(thr); // must not reset it before the report is printed s->mtx.Unlock(); if (unlock_locked) { ThreadRegistryLock l(ctx->thread_registry); @@ -136,7 +136,7 @@ if (unlock_locked) { SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr); if (s != 0) { - s->Reset(); + s->Reset(thr); s->mtx.Unlock(); } } @@ -429,7 +429,7 @@ if (thr->ignore_sync) return; thr->clock.set(thr->fast_state.epoch()); - thr->clock.acquire(c); + thr->clock.acquire(&thr->clock_cache, c); StatInc(thr, StatSyncAcquire); } @@ -438,7 +438,7 @@ return; thr->clock.set(thr->fast_state.epoch()); thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.release(c); + thr->clock.release(&thr->clock_cache, c); StatInc(thr, StatSyncRelease); } @@ -447,7 +447,7 @@ return; thr->clock.set(thr->fast_state.epoch()); thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.ReleaseStore(c); + thr->clock.ReleaseStore(&thr->clock_cache, c); StatInc(thr, StatSyncRelease); } @@ -456,7 +456,7 @@ return; thr->clock.set(thr->fast_state.epoch()); thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.acq_rel(c); + thr->clock.acq_rel(&thr->clock_cache, c); StatInc(thr, StatSyncAcquire); StatInc(thr, StatSyncRelease); } Index: lib/tsan/rtl/tsan_rtl_thread.cc =================================================================== --- lib/tsan/rtl/tsan_rtl_thread.cc +++ lib/tsan/rtl/tsan_rtl_thread.cc @@ -36,13 +36,13 @@ #endif void ThreadContext::OnDead() { - sync.Reset(); + CHECK_EQ(sync.size(), 0); } void ThreadContext::OnJoined(void *arg) { ThreadState *caller_thr = static_cast(arg); AcquireImpl(caller_thr, 0, &sync); - sync.Reset(); + sync.Reset(&caller_thr->clock_cache); } struct OnCreatedArgs { @@ -65,11 +65,16 @@ } void ThreadContext::OnReset() { - sync.Reset(); + CHECK_EQ(sync.size(), 0); FlushUnneededShadowMemory(GetThreadTrace(tid), TraceSize() * sizeof(Event)); //!!! FlushUnneededShadowMemory(GetThreadTraceHeader(tid), sizeof(Trace)); } +void ThreadContext::OnDetached(void *arg) { + ThreadState *thr1 = static_cast(arg); + sync.Reset(&thr1->clock_cache); +} + struct OnStartedArgs { ThreadState *thr; uptr stk_addr; @@ -113,7 +118,7 @@ Trace *thr_trace = ThreadTrace(thr->tid); thr_trace->headers[trace].epoch0 = epoch0; StatInc(thr, StatSyncAcquire); - sync.Reset(); + sync.Reset(&thr->clock_cache); DPrintf("#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx " "tls_addr=%zx tls_size=%zx\n", tid, (uptr)epoch0, args->stk_addr, args->stk_size, @@ -134,6 +139,7 @@ ctx->dd->DestroyPhysicalThread(thr->dd_pt); ctx->dd->DestroyLogicalThread(thr->dd_lt); } + ctx->clock_alloc.FlushCache(&thr->clock_cache); ctx->metamap.OnThreadIdle(thr); #ifndef TSAN_GO AllocatorThreadFinish(thr); @@ -307,7 +313,7 @@ void ThreadDetach(ThreadState *thr, uptr pc, int tid) { CHECK_GT(tid, 0); CHECK_LT(tid, kMaxTid); - ctx->thread_registry->DetachThread(tid); + ctx->thread_registry->DetachThread(tid, thr); } void ThreadSetName(ThreadState *thr, const char *name) { Index: lib/tsan/rtl/tsan_sync.h =================================================================== --- lib/tsan/rtl/tsan_sync.h +++ lib/tsan/rtl/tsan_sync.h @@ -47,7 +47,7 @@ SyncClock clock; void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid); - void Reset(); + void Reset(ThreadState *thr); u64 GetId() const { // 47 lsb is addr, then 14 bits is low part of uid, then 3 zero bits. Index: lib/tsan/rtl/tsan_sync.cc =================================================================== --- lib/tsan/rtl/tsan_sync.cc +++ lib/tsan/rtl/tsan_sync.cc @@ -21,7 +21,7 @@ SyncVar::SyncVar() : mtx(MutexTypeSyncVar, StatMtxSyncVar) { - Reset(); + Reset(0); } void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid) { @@ -36,7 +36,7 @@ DDMutexInit(thr, pc, this); } -void SyncVar::Reset() { +void SyncVar::Reset(ThreadState *thr) { uid = 0; creation_stack_id = 0; owner_tid = kInvalidTid; @@ -47,8 +47,13 @@ is_broken = 0; is_linker_init = 0; - clock.Zero(); - read_clock.Reset(); + if (thr == 0) { + CHECK_EQ(clock.size(), 0); + CHECK_EQ(read_clock.size(), 0); + } else { + clock.Reset(&thr->clock_cache); + read_clock.Reset(&thr->clock_cache); + } } MetaMap::MetaMap() { @@ -93,7 +98,7 @@ DCHECK(idx & kFlagSync); SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask); u32 next = s->next; - s->Reset(); + s->Reset(thr); sync_alloc_.Free(&thr->sync_cache, idx & ~kFlagMask); idx = next; } else { @@ -143,7 +148,7 @@ SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask); if (s->addr == addr) { if (myidx != 0) { - mys->Reset(); + mys->Reset(thr); sync_alloc_.Free(&thr->sync_cache, myidx); } if (write_lock) Index: lib/tsan/tests/unit/tsan_clock_test.cc =================================================================== --- lib/tsan/tests/unit/tsan_clock_test.cc +++ lib/tsan/tests/unit/tsan_clock_test.cc @@ -17,6 +17,8 @@ namespace __tsan { +ClockCache cache; + TEST(Clock, VectorBasic) { ThreadClock clk(0); ASSERT_EQ(clk.size(), 1U); @@ -38,30 +40,32 @@ SyncClock chunked; ASSERT_EQ(vector.size(), 1U); ASSERT_EQ(chunked.size(), 0U); - vector.acquire(&chunked); + vector.acquire(&cache, &chunked); ASSERT_EQ(vector.size(), 1U); ASSERT_EQ(chunked.size(), 0U); - vector.release(&chunked); + vector.release(&cache, &chunked); ASSERT_EQ(vector.size(), 1U); ASSERT_EQ(chunked.size(), 1U); - vector.acq_rel(&chunked); + vector.acq_rel(&cache, &chunked); ASSERT_EQ(vector.size(), 1U); ASSERT_EQ(chunked.size(), 1U); + chunked.Reset(&cache); } TEST(Clock, AcquireRelease) { ThreadClock vector1(100); vector1.tick(); SyncClock chunked; - vector1.release(&chunked); + vector1.release(&cache, &chunked); ASSERT_EQ(chunked.size(), 101U); ThreadClock vector2(0); - vector2.acquire(&chunked); + vector2.acquire(&cache, &chunked); ASSERT_EQ(vector2.size(), 101U); ASSERT_EQ(vector2.get(0), 0U); ASSERT_EQ(vector2.get(1), 0U); ASSERT_EQ(vector2.get(99), 0U); ASSERT_EQ(vector2.get(100), 1U); + chunked.Reset(&cache); } TEST(Clock, RepeatedAcquire) { @@ -71,10 +75,12 @@ thr2.tick(); SyncClock sync; - thr1.ReleaseStore(&sync); + thr1.ReleaseStore(&cache, &sync); + + thr2.acquire(&cache, &sync); + thr2.acquire(&cache, &sync); - thr2.acquire(&sync); - thr2.acquire(&sync); + sync.Reset(&cache); } TEST(Clock, ManyThreads) { @@ -83,9 +89,9 @@ ThreadClock vector(0); vector.tick(); vector.set(i, 1); - vector.release(&chunked); + vector.release(&cache, &chunked); ASSERT_EQ(i + 1, chunked.size()); - vector.acquire(&chunked); + vector.acquire(&cache, &chunked); ASSERT_EQ(i + 1, vector.size()); } @@ -93,10 +99,12 @@ ASSERT_EQ(1U, chunked.get(i)); ThreadClock vector(1); - vector.acquire(&chunked); + vector.acquire(&cache, &chunked); ASSERT_EQ(100U, vector.size()); for (unsigned i = 0; i < 100; i++) ASSERT_EQ(1U, vector.get(i)); + + chunked.Reset(&cache); } TEST(Clock, DifferentSizes) { @@ -107,33 +115,102 @@ vector2.tick(); { SyncClock chunked; - vector1.release(&chunked); + vector1.release(&cache, &chunked); ASSERT_EQ(chunked.size(), 11U); - vector2.release(&chunked); + vector2.release(&cache, &chunked); ASSERT_EQ(chunked.size(), 21U); + chunked.Reset(&cache); } { SyncClock chunked; - vector2.release(&chunked); + vector2.release(&cache, &chunked); ASSERT_EQ(chunked.size(), 21U); - vector1.release(&chunked); + vector1.release(&cache, &chunked); ASSERT_EQ(chunked.size(), 21U); + chunked.Reset(&cache); } { SyncClock chunked; - vector1.release(&chunked); - vector2.acquire(&chunked); + vector1.release(&cache, &chunked); + vector2.acquire(&cache, &chunked); ASSERT_EQ(vector2.size(), 21U); + chunked.Reset(&cache); } { SyncClock chunked; - vector2.release(&chunked); - vector1.acquire(&chunked); + vector2.release(&cache, &chunked); + vector1.acquire(&cache, &chunked); ASSERT_EQ(vector1.size(), 21U); + chunked.Reset(&cache); } } } +TEST(Clock, Growth) { + { + ThreadClock vector(10); + vector.tick(); + vector.set(5, 42); + SyncClock sync; + vector.release(&cache, &sync); + ASSERT_EQ(sync.size(), 11U); + ASSERT_EQ(sync.get(0), 0ULL); + ASSERT_EQ(sync.get(1), 0ULL); + ASSERT_EQ(sync.get(5), 42ULL); + ASSERT_EQ(sync.get(9), 0ULL); + ASSERT_EQ(sync.get(10), 1ULL); + sync.Reset(&cache); + } + { + ThreadClock vector1(10); + vector1.tick(); + ThreadClock vector2(20); + vector2.tick(); + SyncClock sync; + vector1.release(&cache, &sync); + vector2.release(&cache, &sync); + ASSERT_EQ(sync.size(), 21U); + ASSERT_EQ(sync.get(0), 0ULL); + ASSERT_EQ(sync.get(10), 1ULL); + ASSERT_EQ(sync.get(19), 0ULL); + ASSERT_EQ(sync.get(20), 1ULL); + sync.Reset(&cache); + } + { + ThreadClock vector(100); + vector.tick(); + vector.set(5, 42); + vector.set(90, 84); + SyncClock sync; + vector.release(&cache, &sync); + ASSERT_EQ(sync.size(), 101U); + ASSERT_EQ(sync.get(0), 0ULL); + ASSERT_EQ(sync.get(1), 0ULL); + ASSERT_EQ(sync.get(5), 42ULL); + ASSERT_EQ(sync.get(60), 0ULL); + ASSERT_EQ(sync.get(70), 0ULL); + ASSERT_EQ(sync.get(90), 84ULL); + ASSERT_EQ(sync.get(99), 0ULL); + ASSERT_EQ(sync.get(100), 1ULL); + sync.Reset(&cache); + } + { + ThreadClock vector1(10); + vector1.tick(); + ThreadClock vector2(100); + vector2.tick(); + SyncClock sync; + vector1.release(&cache, &sync); + vector2.release(&cache, &sync); + ASSERT_EQ(sync.size(), 101U); + ASSERT_EQ(sync.get(0), 0ULL); + ASSERT_EQ(sync.get(10), 1ULL); + ASSERT_EQ(sync.get(99), 0ULL); + ASSERT_EQ(sync.get(100), 1ULL); + sync.Reset(&cache); + } +} + const int kThreads = 4; const int kClocks = 4; @@ -257,31 +334,31 @@ if (printing) printf("acquire thr%d <- clk%d\n", tid, cid); thr0[tid]->acquire(sync0[cid]); - thr1[tid]->acquire(sync1[cid]); + thr1[tid]->acquire(&cache, sync1[cid]); break; case 1: if (printing) printf("release thr%d -> clk%d\n", tid, cid); thr0[tid]->release(sync0[cid]); - thr1[tid]->release(sync1[cid]); + thr1[tid]->release(&cache, sync1[cid]); break; case 2: if (printing) printf("acq_rel thr%d <> clk%d\n", tid, cid); thr0[tid]->acq_rel(sync0[cid]); - thr1[tid]->acq_rel(sync1[cid]); + thr1[tid]->acq_rel(&cache, sync1[cid]); break; case 3: if (printing) printf("rel_str thr%d >> clk%d\n", tid, cid); thr0[tid]->ReleaseStore(sync0[cid]); - thr1[tid]->ReleaseStore(sync1[cid]); + thr1[tid]->ReleaseStore(&cache, sync1[cid]); break; case 4: if (printing) printf("reset clk%d\n", cid); sync0[cid]->Reset(); - sync1[cid]->Reset(); + sync1[cid]->Reset(&cache); break; case 5: if (printing) @@ -331,6 +408,10 @@ return false; } } + + for (unsigned i = 0; i < kClocks; i++) { + sync1[i]->Reset(&cache); + } return true; } Index: lib/tsan/tests/unit/tsan_sync_test.cc =================================================================== --- lib/tsan/tests/unit/tsan_sync_test.cc +++ lib/tsan/tests/unit/tsan_sync_test.cc @@ -114,7 +114,7 @@ u64 block[1] = {}; // fake malloc block m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64)); SyncVar *s = m->GetOrCreateAndLock(thr, 0, (uptr)&block[0], true); - s->Reset(); + s->Reset(thr); s->mtx.Unlock(); uptr sz = m->FreeBlock(thr, 0, (uptr)&block[0]); EXPECT_EQ(sz, 1 * sizeof(u64)); Index: test/tsan/thread_detach.c =================================================================== --- test/tsan/thread_detach.c +++ test/tsan/thread_detach.c @@ -0,0 +1,20 @@ +// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s +#include +#include +#include + +void *Thread(void *x) { + return 0; +} + +int main() { + pthread_t t; + pthread_create(&t, 0, Thread, 0); + sleep(1); + pthread_detach(t); + printf("PASS\n"); + return 0; +} + +// CHECK-NOT: WARNING: ThreadSanitizer: thread leak +// CHECK: PASS