diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h --- a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h @@ -104,6 +104,8 @@ return threads_.empty() ? nullptr : threads_[tid]; } + u32 NumThreadsLocked() const { return threads_.size(); } + u32 CreateThread(uptr user_id, bool detached, u32 parent_tid, void *arg); typedef void (*ThreadCallback)(ThreadContextBase *tctx, void *arg); diff --git a/compiler-rt/lib/tsan/CMakeLists.txt b/compiler-rt/lib/tsan/CMakeLists.txt --- a/compiler-rt/lib/tsan/CMakeLists.txt +++ b/compiler-rt/lib/tsan/CMakeLists.txt @@ -119,7 +119,6 @@ rtl/tsan_symbolize.h rtl/tsan_sync.h rtl/tsan_trace.h - rtl/tsan_update_shadow_word.inc rtl/tsan_vector_clock.h ) diff --git a/compiler-rt/lib/tsan/check_analyze.sh b/compiler-rt/lib/tsan/check_analyze.sh --- a/compiler-rt/lib/tsan/check_analyze.sh +++ b/compiler-rt/lib/tsan/check_analyze.sh @@ -34,21 +34,27 @@ fi } +# All hot functions must contain no PUSH/POP +# and no CALLs (everything is tail-called). for f in write1 write2 write4 write8; do check $f rsp 1 - check $f push 2 + check $f push 0 + check $f pop 0 + check $f call 0 done for f in read1 read2 read4 read8; do check $f rsp 1 - check $f push 3 + check $f push 0 + check $f pop 0 + check $f call 0 done for f in func_entry func_exit; do check $f rsp 0 check $f push 0 check $f pop 0 - check $f call 1 # TraceSwitch() + check $f call 0 done echo LGTM diff --git a/compiler-rt/lib/tsan/go/build.bat b/compiler-rt/lib/tsan/go/build.bat --- a/compiler-rt/lib/tsan/go/build.bat +++ b/compiler-rt/lib/tsan/go/build.bat @@ -14,6 +14,7 @@ ..\rtl\tsan_suppressions.cpp ^ ..\rtl\tsan_sync.cpp ^ ..\rtl\tsan_stack_trace.cpp ^ + ..\rtl\tsan_vector_clock.cpp ^ ..\..\sanitizer_common\sanitizer_allocator.cpp ^ ..\..\sanitizer_common\sanitizer_common.cpp ^ ..\..\sanitizer_common\sanitizer_flags.cpp ^ diff --git a/compiler-rt/lib/tsan/go/buildgo.sh b/compiler-rt/lib/tsan/go/buildgo.sh --- a/compiler-rt/lib/tsan/go/buildgo.sh +++ b/compiler-rt/lib/tsan/go/buildgo.sh @@ -19,6 +19,7 @@ ../rtl/tsan_stack_trace.cpp ../rtl/tsan_suppressions.cpp ../rtl/tsan_sync.cpp + ../rtl/tsan_vector_clock.cpp ../../sanitizer_common/sanitizer_allocator.cpp ../../sanitizer_common/sanitizer_common.cpp ../../sanitizer_common/sanitizer_common_libcdep.cpp diff --git a/compiler-rt/lib/tsan/go/tsan_go.cpp b/compiler-rt/lib/tsan/go/tsan_go.cpp --- a/compiler-rt/lib/tsan/go/tsan_go.cpp +++ b/compiler-rt/lib/tsan/go/tsan_go.cpp @@ -214,7 +214,7 @@ } void __tsan_free(uptr p, uptr sz) { - ctx->metamap.FreeRange(get_cur_proc(), p, sz); + ctx->metamap.FreeRange(get_cur_proc(), p, sz, false); } void __tsan_go_start(ThreadState *parent, ThreadState **pthr, void *pc) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h --- a/compiler-rt/lib/tsan/rtl/tsan_defs.h +++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h @@ -63,6 +63,13 @@ constexpr uptr kEpochBits = 14; constexpr Epoch kEpochZero = static_cast(0); constexpr Epoch kEpochOver = static_cast(1 << kEpochBits); +constexpr Epoch kEpochLast = static_cast((1 << kEpochBits) - 1); + +inline Epoch EpochInc(Epoch epoch) { + return static_cast(static_cast(epoch) + 1); +} + +inline bool EpochOverflow(Epoch epoch) { return epoch == kEpochOver; } const int kClkBits = 42; const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1; @@ -107,7 +114,7 @@ const uptr kShadowCell = 8; // Single shadow value. -typedef u64 RawShadow; +enum class RawShadow : u32 {}; const uptr kShadowSize = sizeof(RawShadow); // Shadow memory is kShadowMultiplier times larger than user memory. @@ -184,10 +191,13 @@ struct Processor; struct ThreadState; class ThreadContext; +struct TidSlot; struct Context; struct ReportStack; class ReportDesc; class RegionAlloc; +struct Trace; +struct TracePart; typedef uptr AccessType; @@ -198,6 +208,8 @@ kAccessVptr = 1 << 2, // read or write of an object virtual table pointer kAccessFree = 1 << 3, // synthetic memory access during memory freeing kAccessExternalPC = 1 << 4, // access PC can have kExternalPCBit set + kAccessCheckOnly = 1 << 5, // check for races, but don't store + kAccessNoRodata = 1 << 6, // don't check for .rodata marker }; // Descriptor of user's memory block. @@ -219,15 +231,18 @@ // as 16-bit values, see tsan_defs.h. }; -enum MutexType { - MutexTypeTrace = MutexLastCommon, - MutexTypeReport, +enum { + MutexTypeReport = MutexLastCommon, MutexTypeSyncVar, MutexTypeAnnotations, MutexTypeAtExit, MutexTypeFired, MutexTypeRacy, MutexTypeGlobalProc, + MutexTypeTrace, + MutexTypeSlot, + MutexTypeSlots, + MutexTypeMultiSlot, }; } // namespace __tsan diff --git a/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h b/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h --- a/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h +++ b/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h @@ -104,6 +104,15 @@ return atomic_load_relaxed(&fillpos_) * kL2Size * sizeof(T); } + template + void ForEach(Func func) { + SpinMutexLock lock(&mtx_); + uptr fillpos = atomic_load_relaxed(&fillpos_); + for (uptr l1 = 0; l1 < fillpos; l1++) { + for (IndexT l2 = l1 == 0 ? 1 : 0; l2 < kL2Size; l2++) func(&map_[l1][l2]); + } + } + private: T *map_[kL1Size]; SpinMutex mtx_; diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp @@ -110,12 +110,6 @@ if (common_flags()->help) parser.PrintFlagDescriptions(); - if (f->history_size < 0 || f->history_size > 7) { - Printf("ThreadSanitizer: incorrect value for history_size" - " (must be [0..7])\n"); - Die(); - } - if (f->io_sync < 0 || f->io_sync > 2) { Printf("ThreadSanitizer: incorrect value for io_sync" " (must be [0..2])\n"); diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.inc b/compiler-rt/lib/tsan/rtl/tsan_flags.inc --- a/compiler-rt/lib/tsan/rtl/tsan_flags.inc +++ b/compiler-rt/lib/tsan/rtl/tsan_flags.inc @@ -59,14 +59,10 @@ "Stops on start until __tsan_resume() is called (for debugging).") TSAN_FLAG(bool, running_on_valgrind, false, "Controls whether RunningOnValgrind() returns true or false.") -// There are a lot of goroutines in Go, so we use smaller history. TSAN_FLAG( - int, history_size, SANITIZER_GO ? 1 : 3, - "Per-thread history size, controls how many previous memory accesses " - "are remembered per thread. Possible values are [0..7]. " - "history_size=0 amounts to 32K memory accesses. Each next value doubles " - "the amount of memory accesses, up to history_size=7 that amounts to " - "4M memory accesses. The default value is 2 (128K memory accesses).") + uptr, history_size, 0, + "Per-thread history size," + " controls how many extra previous memory accesses are remembered per thread.") TSAN_FLAG(int, io_sync, 1, "Controls level of synchronization implied by IO operations. " "0 - no synchronization " diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -1967,6 +1967,7 @@ static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire, int sig, __sanitizer_siginfo *info, void *uctx) { + CHECK(thr->slot); __sanitizer_sigaction *sigactions = interceptor_ctx()->sigactions; if (acquire) Acquire(thr, 0, (uptr)&sigactions[sig]); @@ -2254,7 +2255,7 @@ }; static bool IsAppNotRodata(uptr addr) { - return IsAppMem(addr) && *MemToShadow(addr) != kShadowRodata; + return IsAppMem(addr) && *MemToShadow(addr) != Shadow::kRodata; } static int dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size, diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp @@ -235,8 +235,9 @@ T v = NoTsanAtomicLoad(a, mo); SyncVar *s = ctx->metamap.GetSyncIfExists((uptr)a); if (s) { - ReadLock l(&s->mtx); - AcquireImpl(thr, pc, &s->clock); + SlotLocker locker(thr); + ReadLock lock(&s->mtx); + thr->clock.Acquire(s->clock); // Re-read under sync mutex because we need a consistent snapshot // of the value and the clock we acquire. v = NoTsanAtomicLoad(a, mo); @@ -270,14 +271,14 @@ NoTsanAtomicStore(a, v, mo); return; } - __sync_synchronize(); - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - ReleaseStoreImpl(thr, pc, &s->clock); - NoTsanAtomicStore(a, v, mo); + SlotLocker locker(thr); + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); + Lock lock(&s->mtx); + thr->clock.ReleaseStore(&s->clock); + NoTsanAtomicStore(a, v, mo); + } + IncrementEpoch(thr); } template @@ -285,18 +286,21 @@ MemoryAccess(thr, pc, (uptr)a, AccessSize(), kAccessWrite | kAccessAtomic); if (LIKELY(mo == mo_relaxed)) return F(a, v); - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - if (IsAcqRelOrder(mo)) - AcquireReleaseImpl(thr, pc, &s->clock); - else if (IsReleaseOrder(mo)) - ReleaseImpl(thr, pc, &s->clock); - else if (IsAcquireOrder(mo)) - AcquireImpl(thr, pc, &s->clock); - return F(a, v); + SlotLocker locker(thr); + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); + RWLock lock(&s->mtx, IsReleaseOrder(mo)); + if (IsAcqRelOrder(mo)) + thr->clock.ReleaseAcquire(&s->clock); + else if (IsReleaseOrder(mo)) + thr->clock.Release(&s->clock); + else if (IsAcquireOrder(mo)) + thr->clock.Acquire(s->clock); + v = F(a, v); + } + if (IsReleaseOrder(mo)) + IncrementEpoch(thr); + return v; } template @@ -416,27 +420,28 @@ *c = pr; return false; } - + SlotLocker locker(thr); bool release = IsReleaseOrder(mo); - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); - RWLock l(&s->mtx, release); - T cc = *c; - T pr = func_cas(a, cc, v); - bool success = pr == cc; - if (!success) { - *c = pr; - mo = fmo; + bool success; + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); + RWLock lock(&s->mtx, release); + T cc = *c; + T pr = func_cas(a, cc, v); + success = pr == cc; + if (!success) { + *c = pr; + mo = fmo; + } + if (success && IsAcqRelOrder(mo)) + thr->clock.ReleaseAcquire(&s->clock); + else if (success && IsReleaseOrder(mo)) + thr->clock.Release(&s->clock); + else if (IsAcquireOrder(mo)) + thr->clock.Acquire(s->clock); } - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - - if (success && IsAcqRelOrder(mo)) - AcquireReleaseImpl(thr, pc, &s->clock); - else if (success && IsReleaseOrder(mo)) - ReleaseImpl(thr, pc, &s->clock); - else if (IsAcquireOrder(mo)) - AcquireImpl(thr, pc, &s->clock); + if (success && release) + IncrementEpoch(thr); return success; } diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp @@ -106,7 +106,7 @@ DCHECK_GE(ptr, jctx->heap_begin); DCHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size); - ctx->metamap.FreeRange(thr->proc(), ptr, size); + ctx->metamap.FreeRange(thr->proc(), ptr, size, false); } void __tsan_java_move(jptr src, jptr dst, jptr size) { @@ -133,7 +133,7 @@ // support that anymore as it contains addresses of accesses. RawShadow *d = MemToShadow(dst); RawShadow *dend = MemToShadow(dst + size); - internal_memset(d, 0, (dend - d) * sizeof(*d)); + ShadowSet(d, dend, Shadow::kEmpty); } jptr __tsan_java_find(jptr *from_ptr, jptr to) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp @@ -219,8 +219,17 @@ void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) { DPrintf("#%d: alloc(%zu) = 0x%zx\n", thr->tid, sz, p); + // Note: this can run before thread initialization/after finalization. + // As a result this is not necessarily synchronized with DoReset, + // which iterates over and resets all sync objects, + // but it is fine to create new MBlocks in this context. ctx->metamap.AllocBlock(thr, pc, p, sz); - if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited) + // If this runs before thread initialization/after finalization + // and we don't have trace initialized, we can't imitate writes. + // In such case just reset the shadow range, it is fine since + // it affects only a small fraction of special objects. + if (write && thr->ignore_reads_and_writes == 0 && + atomic_load_relaxed(&thr->trace_pos)) MemoryRangeImitateWrite(thr, pc, (uptr)p, sz); else MemoryResetRange(thr, pc, (uptr)p, sz); @@ -228,9 +237,16 @@ void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) { CHECK_NE(p, (void*)0); - uptr sz = ctx->metamap.FreeBlock(thr->proc(), p); + if (!thr->slot) { + // Very early/late in thread lifetime, or during fork. + UNUSED uptr sz = ctx->metamap.FreeBlock(thr->proc(), p, false); + DPrintf("#%d: free(0x%zx, %zu) (no slot)\n", thr->tid, p, sz); + return; + } + SlotLocker locker(thr); + uptr sz = ctx->metamap.FreeBlock(thr->proc(), p, true); DPrintf("#%d: free(0x%zx, %zu)\n", thr->tid, p, sz); - if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited) + if (write && thr->ignore_reads_and_writes == 0) MemoryRangeFreed(thr, pc, (uptr)p, sz); } @@ -393,8 +409,6 @@ void __tsan_on_thread_idle() { ThreadState *thr = cur_thread(); - thr->clock.ResetCached(&thr->proc()->clock_cache); - thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache); allocator()->SwallowCache(&thr->proc()->alloc_cache); internal_allocator()->SwallowCache(&thr->proc()->internal_alloc_cache); ctx->metamap.OnProcIdle(thr->proc()); diff --git a/compiler-rt/lib/tsan/rtl/tsan_mutexset.h b/compiler-rt/lib/tsan/rtl/tsan_mutexset.h --- a/compiler-rt/lib/tsan/rtl/tsan_mutexset.h +++ b/compiler-rt/lib/tsan/rtl/tsan_mutexset.h @@ -25,8 +25,6 @@ struct Desc { uptr addr; StackID stack_id; - u64 id; - u64 epoch; u32 seq; u32 count; bool write; @@ -40,10 +38,7 @@ }; MutexSet(); - // The 'id' is obtained from SyncVar::GetId(). - void Add(u64 id, bool write, u64 epoch); - void Del(u64 id, bool write); - void Remove(u64 id); // Removes the mutex completely (if it's destroyed). + void Reset(); void AddAddr(uptr addr, StackID stack_id, bool write); void DelAddr(uptr addr, bool destroy = false); uptr Size() const; @@ -82,9 +77,7 @@ // in different goroutine). #if SANITIZER_GO MutexSet::MutexSet() {} -void MutexSet::Add(u64 id, bool write, u64 epoch) {} -void MutexSet::Del(u64 id, bool write) {} -void MutexSet::Remove(u64 id) {} +void MutexSet::Reset() {} void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {} void MutexSet::DelAddr(uptr addr, bool destroy) {} uptr MutexSet::Size() const { return 0; } diff --git a/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp b/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp @@ -19,57 +19,7 @@ MutexSet::MutexSet() { } -void MutexSet::Add(u64 id, bool write, u64 epoch) { - // Look up existing mutex with the same id. - for (uptr i = 0; i < size_; i++) { - if (descs_[i].id == id) { - descs_[i].count++; - descs_[i].epoch = epoch; - return; - } - } - // On overflow, find the oldest mutex and drop it. - if (size_ == kMaxSize) { - u64 minepoch = (u64)-1; - u64 mini = (u64)-1; - for (uptr i = 0; i < size_; i++) { - if (descs_[i].epoch < minepoch) { - minepoch = descs_[i].epoch; - mini = i; - } - } - RemovePos(mini); - CHECK_EQ(size_, kMaxSize - 1); - } - // Add new mutex descriptor. - descs_[size_].addr = 0; - descs_[size_].stack_id = kInvalidStackID; - descs_[size_].id = id; - descs_[size_].write = write; - descs_[size_].epoch = epoch; - descs_[size_].seq = seq_++; - descs_[size_].count = 1; - size_++; -} - -void MutexSet::Del(u64 id, bool write) { - for (uptr i = 0; i < size_; i++) { - if (descs_[i].id == id) { - if (--descs_[i].count == 0) - RemovePos(i); - return; - } - } -} - -void MutexSet::Remove(u64 id) { - for (uptr i = 0; i < size_; i++) { - if (descs_[i].id == id) { - RemovePos(i); - return; - } - } -} +void MutexSet::Reset() { internal_memset(this, 0, sizeof(*this)); } void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) { // Look up existing mutex with the same id. @@ -93,9 +43,7 @@ // Add new mutex descriptor. descs_[size_].addr = addr; descs_[size_].stack_id = stack_id; - descs_[size_].id = 0; descs_[size_].write = write; - descs_[size_].epoch = 0; descs_[size_].seq = seq_++; descs_[size_].count = 1; size_++; diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h --- a/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -18,8 +18,8 @@ # error "Only 64-bit is supported" #endif +#include "sanitizer_common/sanitizer_common.h" #include "tsan_defs.h" -#include "tsan_trace.h" namespace __tsan { @@ -45,9 +45,7 @@ 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) 4000 0000 0000 - 5500 0000 0000: - 5500 0000 0000 - 5680 0000 0000: pie binaries without ASLR or on 4.1+ kernels -5680 0000 0000 - 6000 0000 0000: - -6000 0000 0000 - 6200 0000 0000: traces -6200 0000 0000 - 7d00 0000 0000: - +5680 0000 0000 - 7d00 0000 0000: - 7b00 0000 0000 - 7c00 0000 0000: heap 7c00 0000 0000 - 7e80 0000 0000: - 7e80 0000 0000 - 8000 0000 0000: modules and main thread stack @@ -67,8 +65,6 @@ struct Mapping48AddressSpace { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x340000000000ull; - static const uptr kTraceMemBeg = 0x600000000000ull; - static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x010000000000ull; static const uptr kShadowEnd = 0x200000000000ull; static const uptr kHeapMemBeg = 0x7b0000000000ull; @@ -89,14 +85,12 @@ C/C++ on linux/mips64 (40-bit VMA) 0000 0000 00 - 0100 0000 00: - (4 GB) 0100 0000 00 - 0200 0000 00: main binary (4 GB) -0200 0000 00 - 2000 0000 00: - (120 GB) -2000 0000 00 - 4000 0000 00: shadow (128 GB) +0200 0000 00 - 1200 0000 00: - (120 GB) +1200 0000 00 - 4000 0000 00: shadow (128 GB) 4000 0000 00 - 5000 0000 00: metainfo (memory blocks and sync objects) (64 GB) 5000 0000 00 - aa00 0000 00: - (360 GB) aa00 0000 00 - ab00 0000 00: main binary (PIE) (4 GB) -ab00 0000 00 - b000 0000 00: - (20 GB) -b000 0000 00 - b200 0000 00: traces (8 GB) -b200 0000 00 - fe00 0000 00: - (304 GB) +ab00 0000 00 - fe00 0000 00: - (332 GB) fe00 0000 00 - ff00 0000 00: heap (4 GB) ff00 0000 00 - ff80 0000 00: - (2 GB) ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB) @@ -104,9 +98,7 @@ struct MappingMips64_40 { static const uptr kMetaShadowBeg = 0x4000000000ull; static const uptr kMetaShadowEnd = 0x5000000000ull; - static const uptr kTraceMemBeg = 0xb000000000ull; - static const uptr kTraceMemEnd = 0xb200000000ull; - static const uptr kShadowBeg = 0x2000000000ull; + static const uptr kShadowBeg = 0x1200000000ull; static const uptr kShadowEnd = 0x4000000000ull; static const uptr kHeapMemBeg = 0xfe00000000ull; static const uptr kHeapMemEnd = 0xff00000000ull; @@ -131,9 +123,7 @@ 0400 0000 00 - 0c00 0000 00: shadow memory (32 GB) 0c00 0000 00 - 0d00 0000 00: - (4 GB) 0d00 0000 00 - 0e00 0000 00: metainfo (4 GB) -0e00 0000 00 - 0f00 0000 00: - (4 GB) -0f00 0000 00 - 0fc0 0000 00: traces (3 GB) -0fc0 0000 00 - 1000 0000 00: - +0e00 0000 00 - 1000 0000 00: - */ struct MappingAppleAarch64 { static const uptr kLoAppMemBeg = 0x0100000000ull; @@ -144,13 +134,11 @@ static const uptr kShadowEnd = 0x0c00000000ull; static const uptr kMetaShadowBeg = 0x0d00000000ull; static const uptr kMetaShadowEnd = 0x0e00000000ull; - static const uptr kTraceMemBeg = 0x0f00000000ull; - static const uptr kTraceMemEnd = 0x0fc0000000ull; static const uptr kHiAppMemBeg = 0x0fc0000000ull; static const uptr kHiAppMemEnd = 0x0fc0000000ull; static const uptr kShadowMsk = 0x0ull; static const uptr kShadowXor = 0x0ull; - static const uptr kShadowAdd = 0x0ull; + static const uptr kShadowAdd = 0x0200000000ull; static const uptr kVdsoBeg = 0x7000000000000000ull; static const uptr kMidAppMemBeg = 0; static const uptr kMidAppMemEnd = 0; @@ -159,29 +147,25 @@ /* C/C++ on linux/aarch64 (39-bit VMA) 0000 0010 00 - 0100 0000 00: main binary -0100 0000 00 - 0800 0000 00: - -0800 0000 00 - 2000 0000 00: shadow memory +0100 0000 00 - 0400 0000 00: - +0400 0000 00 - 2000 0000 00: shadow memory 2000 0000 00 - 3100 0000 00: - 3100 0000 00 - 3400 0000 00: metainfo 3400 0000 00 - 5500 0000 00: - 5500 0000 00 - 5600 0000 00: main binary (PIE) -5600 0000 00 - 6000 0000 00: - -6000 0000 00 - 6200 0000 00: traces -6200 0000 00 - 7d00 0000 00: - +5600 0000 00 - 7c00 0000 00: - 7c00 0000 00 - 7d00 0000 00: heap 7d00 0000 00 - 7fff ffff ff: modules and main thread stack */ struct MappingAarch64_39 { static const uptr kLoAppMemBeg = 0x0000001000ull; static const uptr kLoAppMemEnd = 0x0100000000ull; - static const uptr kShadowBeg = 0x0800000000ull; + static const uptr kShadowBeg = 0x0400000000ull; static const uptr kShadowEnd = 0x2000000000ull; static const uptr kMetaShadowBeg = 0x3100000000ull; static const uptr kMetaShadowEnd = 0x3400000000ull; static const uptr kMidAppMemBeg = 0x5500000000ull; - static const uptr kMidAppMemEnd = 0x5600000000ull; - static const uptr kTraceMemBeg = 0x6000000000ull; - static const uptr kTraceMemEnd = 0x6200000000ull; + static const uptr kMidAppMemEnd = 0x5600000000ull; static const uptr kHeapMemBeg = 0x7c00000000ull; static const uptr kHeapMemEnd = 0x7d00000000ull; static const uptr kHiAppMemBeg = 0x7e00000000ull; @@ -195,15 +179,13 @@ /* C/C++ on linux/aarch64 (42-bit VMA) 00000 0010 00 - 01000 0000 00: main binary -01000 0000 00 - 10000 0000 00: - -10000 0000 00 - 20000 0000 00: shadow memory +01000 0000 00 - 08000 0000 00: - +08000 0000 00 - 20000 0000 00: shadow memory 20000 0000 00 - 26000 0000 00: - 26000 0000 00 - 28000 0000 00: metainfo 28000 0000 00 - 2aa00 0000 00: - 2aa00 0000 00 - 2ab00 0000 00: main binary (PIE) -2ab00 0000 00 - 36200 0000 00: - -36200 0000 00 - 36240 0000 00: traces -36240 0000 00 - 3e000 0000 00: - +2ab00 0000 00 - 3e000 0000 00: - 3e000 0000 00 - 3f000 0000 00: heap 3f000 0000 00 - 3ffff ffff ff: modules and main thread stack */ @@ -211,14 +193,12 @@ static const uptr kBroken = kBrokenReverseMapping; static const uptr kLoAppMemBeg = 0x00000001000ull; static const uptr kLoAppMemEnd = 0x01000000000ull; - static const uptr kShadowBeg = 0x10000000000ull; + static const uptr kShadowBeg = 0x08000000000ull; static const uptr kShadowEnd = 0x20000000000ull; static const uptr kMetaShadowBeg = 0x26000000000ull; static const uptr kMetaShadowEnd = 0x28000000000ull; static const uptr kMidAppMemBeg = 0x2aa00000000ull; - static const uptr kMidAppMemEnd = 0x2ab00000000ull; - static const uptr kTraceMemBeg = 0x36200000000ull; - static const uptr kTraceMemEnd = 0x36400000000ull; + static const uptr kMidAppMemEnd = 0x2ab00000000ull; static const uptr kHeapMemBeg = 0x3e000000000ull; static const uptr kHeapMemEnd = 0x3f000000000ull; static const uptr kHiAppMemBeg = 0x3f000000000ull; @@ -232,14 +212,12 @@ struct MappingAarch64_48 { static const uptr kLoAppMemBeg = 0x0000000001000ull; static const uptr kLoAppMemEnd = 0x0000200000000ull; - static const uptr kShadowBeg = 0x0002000000000ull; + static const uptr kShadowBeg = 0x0001000000000ull; static const uptr kShadowEnd = 0x0004000000000ull; static const uptr kMetaShadowBeg = 0x0005000000000ull; static const uptr kMetaShadowEnd = 0x0006000000000ull; static const uptr kMidAppMemBeg = 0x0aaaa00000000ull; - static const uptr kMidAppMemEnd = 0x0aaaf00000000ull; - static const uptr kTraceMemBeg = 0x0f06000000000ull; - static const uptr kTraceMemEnd = 0x0f06200000000ull; + static const uptr kMidAppMemEnd = 0x0aaaf00000000ull; static const uptr kHeapMemBeg = 0x0ffff00000000ull; static const uptr kHeapMemEnd = 0x0ffff00000000ull; static const uptr kHiAppMemBeg = 0x0ffff00000000ull; @@ -257,9 +235,7 @@ 0001 0000 0000 - 0b00 0000 0000: shadow 0b00 0000 0000 - 0b00 0000 0000: - 0b00 0000 0000 - 0d00 0000 0000: metainfo (memory blocks and sync objects) -0d00 0000 0000 - 0d00 0000 0000: - -0d00 0000 0000 - 0f00 0000 0000: traces -0f00 0000 0000 - 0f00 0000 0000: - +0d00 0000 0000 - 0f00 0000 0000: - 0f00 0000 0000 - 0f50 0000 0000: heap 0f50 0000 0000 - 0f60 0000 0000: - 0f60 0000 0000 - 1000 0000 0000: modules and main thread stack @@ -269,8 +245,6 @@ kBrokenMapping | kBrokenReverseMapping | kBrokenLinearity; static const uptr kMetaShadowBeg = 0x0b0000000000ull; static const uptr kMetaShadowEnd = 0x0d0000000000ull; - static const uptr kTraceMemBeg = 0x0d0000000000ull; - static const uptr kTraceMemEnd = 0x0f0000000000ull; static const uptr kShadowBeg = 0x000100000000ull; static const uptr kShadowEnd = 0x0b0000000000ull; static const uptr kLoAppMemBeg = 0x000000000100ull; @@ -295,8 +269,7 @@ 1000 0000 0000 - 1000 0000 0000: - 1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects) 2000 0000 0000 - 2000 0000 0000: - -2000 0000 0000 - 2200 0000 0000: traces -2200 0000 0000 - 3d00 0000 0000: - +1200 0000 0000 - 3d00 0000 0000: - 3d00 0000 0000 - 3e00 0000 0000: heap 3e00 0000 0000 - 3e80 0000 0000: - 3e80 0000 0000 - 4000 0000 0000: modules and main thread stack @@ -304,8 +277,6 @@ struct MappingPPC64_46 { static const uptr kMetaShadowBeg = 0x100000000000ull; static const uptr kMetaShadowEnd = 0x200000000000ull; - static const uptr kTraceMemBeg = 0x200000000000ull; - static const uptr kTraceMemEnd = 0x220000000000ull; static const uptr kShadowBeg = 0x010000000000ull; static const uptr kShadowEnd = 0x100000000000ull; static const uptr kHeapMemBeg = 0x3d0000000000ull; @@ -329,9 +300,7 @@ 0100 0000 0000 - 1000 0000 0000: shadow 1000 0000 0000 - 1000 0000 0000: - 1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects) -2000 0000 0000 - 2000 0000 0000: - -2000 0000 0000 - 2200 0000 0000: traces -2200 0000 0000 - 7d00 0000 0000: - +2000 0000 0000 - 7d00 0000 0000: - 7d00 0000 0000 - 7e00 0000 0000: heap 7e00 0000 0000 - 7e80 0000 0000: - 7e80 0000 0000 - 8000 0000 0000: modules and main thread stack @@ -339,8 +308,6 @@ struct MappingPPC64_47 { static const uptr kMetaShadowBeg = 0x100000000000ull; static const uptr kMetaShadowEnd = 0x200000000000ull; - static const uptr kTraceMemBeg = 0x200000000000ull; - static const uptr kTraceMemEnd = 0x220000000000ull; static const uptr kShadowBeg = 0x010000000000ull; static const uptr kShadowEnd = 0x100000000000ull; static const uptr kHeapMemBeg = 0x7d0000000000ull; @@ -362,21 +329,17 @@ While the kernel provides a 64-bit address space, we have to restrict ourselves to 48 bits due to how e.g. SyncVar::GetId() works. 0000 0000 1000 - 0e00 0000 0000: binary, modules, stacks - 14 TiB -0e00 0000 0000 - 4000 0000 0000: - -4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app) +0e00 0000 0000 - 2000 0000 0000: - +2000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app) 8000 0000 0000 - 9000 0000 0000: - 9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app) -9800 0000 0000 - a000 0000 0000: - -a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads) -b000 0000 0000 - be00 0000 0000: - +9800 0000 0000 - be00 0000 0000: - be00 0000 0000 - c000 0000 0000: heap - 2TiB (max supported by the allocator) */ struct MappingS390x { static const uptr kMetaShadowBeg = 0x900000000000ull; static const uptr kMetaShadowEnd = 0x980000000000ull; - static const uptr kTraceMemBeg = 0xa00000000000ull; - static const uptr kTraceMemEnd = 0xb00000000000ull; - static const uptr kShadowBeg = 0x400000000000ull; + static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x800000000000ull; static const uptr kHeapMemBeg = 0xbe0000000000ull; static const uptr kHeapMemEnd = 0xc00000000000ull; @@ -400,16 +363,12 @@ 2000 0000 0000 - 2380 0000 0000: shadow 2380 0000 0000 - 3000 0000 0000: - 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 6000 0000 0000: - -6000 0000 0000 - 6200 0000 0000: traces -6200 0000 0000 - 8000 0000 0000: - +4000 0000 0000 - 8000 0000 0000: - */ struct MappingGo48 { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x400000000000ull; - static const uptr kTraceMemBeg = 0x600000000000ull; - static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x238000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -432,7 +391,7 @@ 00c0 0000 0000 - 00e0 0000 0000: heap 00e0 0000 0000 - 0100 0000 0000: - 0100 0000 0000 - 0500 0000 0000: shadow -0500 0000 0000 - 0700 0000 0000: traces +0500 0000 0000 - 0700 0000 0000: - 0700 0000 0000 - 0770 0000 0000: metainfo (memory blocks and sync objects) 07d0 0000 0000 - 8000 0000 0000: - */ @@ -440,8 +399,6 @@ struct MappingGoWindows { static const uptr kMetaShadowBeg = 0x070000000000ull; static const uptr kMetaShadowEnd = 0x077000000000ull; - static const uptr kTraceMemBeg = 0x050000000000ull; - static const uptr kTraceMemEnd = 0x070000000000ull; static const uptr kShadowBeg = 0x010000000000ull; static const uptr kShadowEnd = 0x050000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -466,16 +423,12 @@ 2000 0000 0000 - 2380 0000 0000: shadow 2380 0000 0000 - 2400 0000 0000: - 2400 0000 0000 - 3400 0000 0000: metainfo (memory blocks and sync objects) -3400 0000 0000 - 3600 0000 0000: - -3600 0000 0000 - 3800 0000 0000: traces -3800 0000 0000 - 4000 0000 0000: - +3400 0000 0000 - 4000 0000 0000: - */ struct MappingGoPPC64_46 { static const uptr kMetaShadowBeg = 0x240000000000ull; static const uptr kMetaShadowEnd = 0x340000000000ull; - static const uptr kTraceMemBeg = 0x360000000000ull; - static const uptr kTraceMemEnd = 0x380000000000ull; static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x238000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -500,16 +453,12 @@ 2000 0000 0000 - 3000 0000 0000: shadow 3000 0000 0000 - 3000 0000 0000: - 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 6000 0000 0000: - -6000 0000 0000 - 6200 0000 0000: traces -6200 0000 0000 - 8000 0000 0000: - +4000 0000 0000 - 8000 0000 0000: - */ struct MappingGoPPC64_47 { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x400000000000ull; - static const uptr kTraceMemBeg = 0x600000000000ull; - static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x300000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -534,15 +483,11 @@ 2000 0000 0000 - 3000 0000 0000: shadow 3000 0000 0000 - 3000 0000 0000: - 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 6000 0000 0000: - -6000 0000 0000 - 6200 0000 0000: traces -6200 0000 0000 - 8000 0000 0000: - +4000 0000 0000 - 8000 0000 0000: - */ struct MappingGoAarch64 { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x400000000000ull; - static const uptr kTraceMemBeg = 0x600000000000ull; - static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x300000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -568,15 +513,11 @@ 2000 0000 0000 - 3000 0000 0000: shadow 3000 0000 0000 - 3000 0000 0000: - 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 6000 0000 0000: - -6000 0000 0000 - 6200 0000 0000: traces -6200 0000 0000 - 8000 0000 0000: - +3200 0000 0000 - 8000 0000 0000: - */ struct MappingGoMips64_47 { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x400000000000ull; - static const uptr kTraceMemBeg = 0x600000000000ull; - static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x300000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -600,14 +541,10 @@ 4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app) 8000 0000 0000 - 9000 0000 0000: - 9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app) -9800 0000 0000 - a000 0000 0000: - -a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads) */ struct MappingGoS390x { static const uptr kMetaShadowBeg = 0x900000000000ull; static const uptr kMetaShadowEnd = 0x980000000000ull; - static const uptr kTraceMemBeg = 0xa00000000000ull; - static const uptr kTraceMemEnd = 0xb00000000000ull; static const uptr kShadowBeg = 0x400000000000ull; static const uptr kShadowEnd = 0x800000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -715,8 +652,6 @@ kShadowEnd, kMetaShadowBeg, kMetaShadowEnd, - kTraceMemBeg, - kTraceMemEnd, kVdsoBeg, }; @@ -750,10 +685,6 @@ return Mapping::kMetaShadowBeg; case kMetaShadowEnd: return Mapping::kMetaShadowEnd; - case kTraceMemBeg: - return Mapping::kTraceMemBeg; - case kTraceMemEnd: - return Mapping::kTraceMemEnd; } Die(); } @@ -792,11 +723,6 @@ ALWAYS_INLINE uptr MetaShadowEnd(void) { return SelectMapping(kMetaShadowEnd); } -ALWAYS_INLINE -uptr TraceMemBeg(void) { return SelectMapping(kTraceMemBeg); } -ALWAYS_INLINE -uptr TraceMemEnd(void) { return SelectMapping(kTraceMemEnd); } - struct IsAppMemImpl { template static bool Apply(uptr mem) { @@ -934,43 +860,10 @@ return SelectMapping(addr); } -// The additional page is to catch shadow stack overflow as paging fault. -// Windows wants 64K alignment for mmaps. -const uptr kTotalTraceSize = (kTraceSize * sizeof(Event) + sizeof(Trace) - + (64 << 10) + (64 << 10) - 1) & ~((64 << 10) - 1); - -struct GetThreadTraceImpl { - template - static uptr Apply(uptr tid) { - uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize; - DCHECK_LT(p, Mapping::kTraceMemEnd); - return p; - } -}; - -ALWAYS_INLINE -uptr GetThreadTrace(int tid) { return SelectMapping(tid); } - -struct GetThreadTraceHeaderImpl { - template - static uptr Apply(uptr tid) { - uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize + - kTraceSize * sizeof(Event); - DCHECK_LT(p, Mapping::kTraceMemEnd); - return p; - } -}; - -ALWAYS_INLINE -uptr GetThreadTraceHeader(int tid) { - return SelectMapping(tid); -} - void InitializePlatform(); void InitializePlatformEarly(); void CheckAndProtect(); void InitializeShadowMemoryPlatform(); -void FlushShadowMemory(); void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns); int ExtractResolvFDs(void *state, int *fds, int nfd); int ExtractRecvmsgFDs(void *msg, int *fds, int nfd); diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -94,7 +94,6 @@ MemMeta, MemFile, MemMmap, - MemTrace, MemHeap, MemOther, MemCount, @@ -112,8 +111,6 @@ mem[file ? MemFile : MemMmap] += rss; else if (p >= HeapMemBeg() && p < HeapMemEnd()) mem[MemHeap] += rss; - else if (p >= TraceMemBeg() && p < TraceMemEnd()) - mem[MemTrace] += rss; else mem[MemOther] += rss; } @@ -126,42 +123,33 @@ StackDepotStats stacks = StackDepotGetStats(); uptr nthread, nlive; ctx->thread_registry.GetNumberOfThreads(&nthread, &nlive); + uptr trace_mem; + { + Lock l(&ctx->slot_mtx); + trace_mem = ctx->trace_part_total_allocated * sizeof(TracePart); + } uptr internal_stats[AllocatorStatCount]; internal_allocator()->GetStats(internal_stats); // All these are allocated from the common mmap region. - mem[MemMmap] -= meta.mem_block + meta.sync_obj + stacks.allocated + - internal_stats[AllocatorStatMapped]; + mem[MemMmap] -= meta.mem_block + meta.sync_obj + trace_mem + + stacks.allocated + internal_stats[AllocatorStatMapped]; if (s64(mem[MemMmap]) < 0) mem[MemMmap] = 0; internal_snprintf( buf, buf_size, - "%llus: RSS %zd MB: shadow:%zd meta:%zd file:%zd mmap:%zd" - " trace:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu" - " stacks=%zd[%zd] nthr=%zd/%zd\n", - uptime_ns / (1000 * 1000 * 1000), mem[MemTotal] >> 20, - mem[MemShadow] >> 20, mem[MemMeta] >> 20, mem[MemFile] >> 20, - mem[MemMmap] >> 20, mem[MemTrace] >> 20, mem[MemHeap] >> 20, + "==%zu== %llus [%zu]: RSS %zd MB: shadow:%zd meta:%zd file:%zd" + " mmap:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu" + " trace:%zu stacks=%zd threads=%zu/%zu\n", + internal_getpid(), uptime_ns / (1000 * 1000 * 1000), ctx->global_epoch, + mem[MemTotal] >> 20, mem[MemShadow] >> 20, mem[MemMeta] >> 20, + mem[MemFile] >> 20, mem[MemMmap] >> 20, mem[MemHeap] >> 20, mem[MemOther] >> 20, internal_stats[AllocatorStatMapped] >> 20, - meta.mem_block >> 20, meta.sync_obj >> 20, stacks.allocated >> 20, - stacks.n_uniq_ids, nlive, nthread); -} - -# if SANITIZER_LINUX -void FlushShadowMemoryCallback( - const SuspendedThreadsList &suspended_threads_list, - void *argument) { - ReleaseMemoryPagesToOS(ShadowBeg(), ShadowEnd()); -} -#endif - -void FlushShadowMemory() { -#if SANITIZER_LINUX - StopTheWorld(FlushShadowMemoryCallback, 0); -#endif + meta.mem_block >> 20, meta.sync_obj >> 20, trace_mem >> 20, + stacks.allocated >> 20, nlive, nthread); } #if !SANITIZER_GO -// Mark shadow for .rodata sections with the special kShadowRodata marker. +// Mark shadow for .rodata sections with the special Shadow::kRodata marker. // Accesses to .rodata can't race, so this saves time, memory and trace space. static void MapRodata() { // First create temp file. @@ -182,13 +170,13 @@ return; internal_unlink(name); // Unlink it now, so that we can reuse the buffer. fd_t fd = openrv; - // Fill the file with kShadowRodata. + // Fill the file with Shadow::kRodata. const uptr kMarkerSize = 512 * 1024 / sizeof(RawShadow); InternalMmapVector marker(kMarkerSize); // volatile to prevent insertion of memset for (volatile RawShadow *p = marker.data(); p < marker.data() + kMarkerSize; p++) - *p = kShadowRodata; + *p = Shadow::kRodata; internal_write(fd, marker.data(), marker.size() * sizeof(RawShadow)); // Map the file into memory. uptr page = internal_mmap(0, GetPageSizeCached(), PROT_READ | PROT_WRITE, diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp @@ -112,9 +112,6 @@ } #endif -void FlushShadowMemory() { -} - static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) { vm_address_t address = start; vm_address_t end_address = end; @@ -142,10 +139,8 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) { uptr shadow_res, shadow_dirty; uptr meta_res, meta_dirty; - uptr trace_res, trace_dirty; RegionMemUsage(ShadowBeg(), ShadowEnd(), &shadow_res, &shadow_dirty); RegionMemUsage(MetaShadowBeg(), MetaShadowEnd(), &meta_res, &meta_dirty); - RegionMemUsage(TraceMemBeg(), TraceMemEnd(), &trace_res, &trace_dirty); #if !SANITIZER_GO uptr low_res, low_dirty; @@ -166,7 +161,6 @@ buf, buf_size, "shadow (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" "meta (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" - "traces (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" # if !SANITIZER_GO "low app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" "high app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" @@ -179,7 +173,6 @@ "------------------------------\n", ShadowBeg(), ShadowEnd(), shadow_res / 1024, shadow_dirty / 1024, MetaShadowBeg(), MetaShadowEnd(), meta_res / 1024, meta_dirty / 1024, - TraceMemBeg(), TraceMemEnd(), trace_res / 1024, trace_dirty / 1024, # if !SANITIZER_GO LoAppMemBeg(), LoAppMemEnd(), low_res / 1024, low_dirty / 1024, HiAppMemBeg(), HiAppMemEnd(), high_res / 1024, high_dirty / 1024, diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp @@ -113,24 +113,20 @@ # if defined(__aarch64__) && defined(__APPLE__) && SANITIZER_IOS ProtectRange(HeapMemEnd(), ShadowBeg()); ProtectRange(ShadowEnd(), MetaShadowBeg()); - ProtectRange(MetaShadowEnd(), TraceMemBeg()); -#else + ProtectRange(MetaShadowEnd(), HeapMemBeg()); +# else ProtectRange(LoAppMemEnd(), ShadowBeg()); ProtectRange(ShadowEnd(), MetaShadowBeg()); if (MidAppMemBeg()) { ProtectRange(MetaShadowEnd(), MidAppMemBeg()); - ProtectRange(MidAppMemEnd(), TraceMemBeg()); + ProtectRange(MidAppMemEnd(), HeapMemBeg()); } else { - ProtectRange(MetaShadowEnd(), TraceMemBeg()); + ProtectRange(MetaShadowEnd(), HeapMemBeg()); } - // Memory for traces is mapped lazily in MapThreadTrace. - // Protect the whole range for now, so that user does not map something here. - ProtectRange(TraceMemBeg(), TraceMemEnd()); - ProtectRange(TraceMemEnd(), HeapMemBeg()); ProtectRange(HeapEnd(), HiAppMemBeg()); -#endif +# endif -#if defined(__s390x__) +# if defined(__s390x__) // Protect the rest of the address space. const uptr user_addr_max_l4 = 0x0020000000000000ull; const uptr user_addr_max_l5 = 0xfffffffffffff000ull; diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp @@ -20,9 +20,6 @@ namespace __tsan { -void FlushShadowMemory() { -} - void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {} void InitializePlatformEarly() { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -38,6 +38,7 @@ #include "tsan_defs.h" #include "tsan_flags.h" #include "tsan_ignoreset.h" +#include "tsan_ilist.h" #include "tsan_mman.h" #include "tsan_mutexset.h" #include "tsan_platform.h" @@ -46,6 +47,7 @@ #include "tsan_stack_trace.h" #include "tsan_sync.h" #include "tsan_trace.h" +#include "tsan_vector_clock.h" #if SANITIZER_WORDSIZE != 64 # error "ThreadSanitizer is supported only on 64-bit platforms" @@ -116,7 +118,6 @@ #endif DenseSlabAllocCache block_cache; DenseSlabAllocCache sync_cache; - DenseSlabAllocCache clock_cache; DDPhysicalThread *dd_pt; }; @@ -130,30 +131,56 @@ }; #endif +struct TidEpoch { + Tid tid; + Epoch epoch; +}; + +struct TidSlot { + Mutex mtx; + Sid sid; + atomic_uint32_t raw_epoch; + ThreadState *thr; + Vector journal; + INode node; + + Epoch epoch() const { + return static_cast(atomic_load(&raw_epoch, memory_order_relaxed)); + } + + void SetEpoch(Epoch v) { + atomic_store(&raw_epoch, static_cast(v), memory_order_relaxed); + } + + TidSlot(); +} ALIGNED(SANITIZER_CACHE_LINE_SIZE); + // This struct is stored in TLS. struct ThreadState { FastState fast_state; - // Synch epoch represents the threads's epoch before the last synchronization - // action. It allows to reduce number of shadow state updates. - // For example, fast_synch_epoch=100, last write to addr X was at epoch=150, - // if we are processing write to X from the same thread at epoch=200, - // we do nothing, because both writes happen in the same 'synch epoch'. - // That is, if another memory access does not race with the former write, - // it does not race with the latter as well. - // QUESTION: can we can squeeze this into ThreadState::Fast? - // E.g. ThreadState::Fast is a 44-bit, 32 are taken by synch_epoch and 12 are - // taken by epoch between synchs. - // This way we can save one load from tls. - u64 fast_synch_epoch; + int ignore_sync; +#if !SANITIZER_GO + int ignore_interceptors; +#endif + uptr *shadow_stack_pos; + + // Current position in tctx->trace.Back()->events (Event*). + atomic_uintptr_t trace_pos; + // PC of the last memory access, used to compute PC deltas in the trace. + uptr trace_prev_pc; + // Technically `current` should be a separate THREADLOCAL variable; // but it is placed here in order to share cache line with previous fields. ThreadState* current; + + atomic_sint32_t pending_signals; + + VectorClock clock; + // This is a slow path flag. On fast path, fast_state.GetIgnoreBit() is read. // We do not distinguish beteween ignoring reads and writes // for better performance. int ignore_reads_and_writes; - atomic_sint32_t pending_signals; - int ignore_sync; int suppress_reports; // Go does not support ignores. #if !SANITIZER_GO @@ -162,31 +189,27 @@ #endif uptr *shadow_stack; uptr *shadow_stack_end; - uptr *shadow_stack_pos; - RawShadow *racy_shadow_addr; - RawShadow racy_state[2]; - MutexSet mset; - ThreadClock clock; #if !SANITIZER_GO Vector jmp_bufs; - int ignore_interceptors; -#endif - const Tid tid; - const int unique_id; - bool in_symbolizer; + int in_symbolizer; bool in_ignored_lib; bool is_inited; +#endif + MutexSet mset; bool is_dead; - bool is_freeing; - bool is_vptr_access; - const uptr stk_addr; - const uptr stk_size; - const uptr tls_addr; - const uptr tls_size; + const Tid tid; + uptr stk_addr; + uptr stk_size; + uptr tls_addr; + uptr tls_size; ThreadContext *tctx; DDLogicalThread *dd_lt; + TidSlot *slot; + uptr slot_epoch; + bool slot_locked; + // Current wired Processor, or nullptr. Required to handle any events. Processor *proc1; #if !SANITIZER_GO @@ -200,7 +223,7 @@ #if !SANITIZER_GO StackID last_sleep_stack_id; - ThreadClock last_sleep_clock; + VectorClock last_sleep_clock; #endif // Set in regions of runtime that must be signal-safe and fork-safe. @@ -209,16 +232,7 @@ const ReportDesc *current_report; - // Current position in tctx->trace.Back()->events (Event*). - atomic_uintptr_t trace_pos; - // PC of the last memory access, used to compute PC deltas in the trace. - uptr trace_prev_pc; - Sid sid; - Epoch epoch; - - explicit ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch, - unsigned reuse_count, uptr stk_addr, uptr stk_size, - uptr tls_addr, uptr tls_size); + explicit ThreadState(Tid tid); } ALIGNED(SANITIZER_CACHE_LINE_SIZE); #if !SANITIZER_GO @@ -252,14 +266,9 @@ ~ThreadContext(); ThreadState *thr; StackID creation_stack_id; - SyncClock sync; - // Epoch at which the thread had started. - // If we see an event from the thread stamped by an older epoch, - // the event is from a dead thread that shared tid with this thread. - u64 epoch0; - u64 epoch1; - - v3::Trace trace; + VectorClock *sync; + uptr sync_epoch; + Trace trace; // Override superclass callbacks. void OnDead() override; @@ -319,7 +328,21 @@ Flags flags; fd_t memprof_fd; + // The last slot index (kFreeSid) is used to denote freed memory. + TidSlot slots[kThreadSlotCount - 1]; + + // Protects global_epoch, slot_queue, trace_part_recycle. Mutex slot_mtx; + // Prevents lock order inversions when we lock more than 1 slot. + Mutex multi_slot_mtx; + uptr global_epoch; // guarded by slot_mtx and by all slot mutexes + bool resetting; // global reset is in progress + IList slot_queue GUARDED_BY(slot_mtx); + IList trace_part_recycle + GUARDED_BY(slot_mtx); + uptr trace_part_total_allocated GUARDED_BY(slot_mtx); + uptr trace_part_recycle_finished GUARDED_BY(slot_mtx); + uptr trace_part_finished_excess GUARDED_BY(slot_mtx); }; extern Context *ctx; // The one and the only global runtime context. @@ -348,14 +371,13 @@ class ScopedReportBase { public: - void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, StackTrace stack, - const MutexSet *mset); + void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, Tid tid, + StackTrace stack, const MutexSet *mset); void AddStack(StackTrace stack, bool suppressable = false); void AddThread(const ThreadContext *tctx, bool suppressable = false); - void AddThread(Tid unique_tid, bool suppressable = false); + void AddThread(Tid tid, bool suppressable = false); void AddUniqueTid(Tid unique_tid); - void AddMutex(const SyncVar *s); - u64 AddMutex(u64 id); + int AddMutex(uptr addr, StackID creation_stack_id); void AddLocation(uptr addr, uptr size); void AddSleep(StackID stack_id); void SetCount(int count); @@ -372,8 +394,6 @@ // at best it will cause deadlocks on internal mutexes. ScopedIgnoreInterceptors ignore_interceptors_; - void AddDeadMutex(u64 id); - ScopedReportBase(const ScopedReportBase &) = delete; void operator=(const ScopedReportBase &) = delete; }; @@ -389,8 +409,6 @@ bool ShouldReport(ThreadState *thr, ReportType typ); ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack); -void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk, - MutexSet *mset, uptr *tag = nullptr); // The stack could look like: // |
| | tag | @@ -438,7 +456,8 @@ void ForkParentAfter(ThreadState *thr, uptr pc); void ForkChildAfter(ThreadState *thr, uptr pc, bool start_thread); -void ReportRace(ThreadState *thr); +void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old, + AccessType typ); bool OutputReport(ThreadState *thr, const ScopedReport &srep); bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace); bool IsExpectedReport(uptr addr, uptr size); @@ -468,55 +487,28 @@ void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write); void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write); -void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, - int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic); -void MemoryAccessImpl(ThreadState *thr, uptr addr, - int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, - u64 *shadow_mem, Shadow cur); -void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, - uptr size, bool is_write); +void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, + AccessType typ); void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, AccessType typ); - -const int kSizeLog1 = 0; -const int kSizeLog2 = 1; -const int kSizeLog4 = 2; -const int kSizeLog8 = 3; +// This creates 2 non-inlined specialized versions of MemoryAccessRange. +template +void MemoryAccessRangeT(ThreadState *thr, uptr pc, uptr addr, uptr size); ALWAYS_INLINE -void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, - AccessType typ) { - int size_log; - switch (size) { - case 1: - size_log = kSizeLog1; - break; - case 2: - size_log = kSizeLog2; - break; - case 4: - size_log = kSizeLog4; - break; - default: - DCHECK_EQ(size, 8); - size_log = kSizeLog8; - break; - } - bool is_write = !(typ & kAccessRead); - bool is_atomic = typ & kAccessAtomic; - if (typ & kAccessVptr) - thr->is_vptr_access = true; - if (typ & kAccessFree) - thr->is_freeing = true; - MemoryAccess(thr, pc, addr, size_log, is_write, is_atomic); - if (typ & kAccessVptr) - thr->is_vptr_access = false; - if (typ & kAccessFree) - thr->is_freeing = false; +void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, + bool is_write) { + if (size == 0) + return; + if (is_write) + MemoryAccessRangeT(thr, pc, addr, size); + else + MemoryAccessRangeT(thr, pc, addr, size); } -void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size); +void ShadowSet(RawShadow *p, RawShadow *end, RawShadow v); void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size); +void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size); void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size); void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size); @@ -526,9 +518,6 @@ void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc); void ThreadIgnoreSyncEnd(ThreadState *thr); -void FuncEntry(ThreadState *thr, uptr pc); -void FuncExit(ThreadState *thr); - Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached); void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id, ThreadType thread_type); @@ -574,11 +563,7 @@ void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr); void ReleaseStore(ThreadState *thr, uptr pc, uptr addr); void AfterSleep(ThreadState *thr, uptr pc); -void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c); -void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c); -void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c); -void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c); -void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c); +void IncrementEpoch(ThreadState *thr); // The hacky call uses custom calling convention and an assembly thunk. // It is considerably faster that a normal call for the caller @@ -601,43 +586,19 @@ #define HACKY_CALL(f) f() #endif -void TraceSwitch(ThreadState *thr); -uptr TraceTopPC(ThreadState *thr); -uptr TraceSize(); -uptr TraceParts(); -Trace *ThreadTrace(Tid tid); - -extern "C" void __tsan_trace_switch(); -void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs, - EventType typ, u64 addr) { - if (!kCollectHistory) - return; - // TraceSwitch accesses shadow_stack, but it's called infrequently, - // so we check it here proactively. - DCHECK(thr->shadow_stack); - DCHECK_GE((int)typ, 0); - DCHECK_LE((int)typ, 7); - DCHECK_EQ(GetLsb(addr, kEventPCBits), addr); - u64 pos = fs.GetTracePos(); - if (UNLIKELY((pos % kTracePartSize) == 0)) { -#if !SANITIZER_GO - HACKY_CALL(__tsan_trace_switch); -#else - TraceSwitch(thr); -#endif - } - Event *trace = (Event*)GetThreadTrace(fs.tid()); - Event *evp = &trace[pos]; - Event ev = (u64)addr | ((u64)typ << kEventPCBits); - *evp = ev; -} - #if !SANITIZER_GO uptr ALWAYS_INLINE HeapEnd() { return HeapMemEnd() + PrimaryAllocator::AdditionalSize(); } #endif +void SlotAttachAndLock(ThreadState *thr) ACQUIRE(thr->slot->mtx); +void SlotDetach(ThreadState *thr); +void SlotLock(ThreadState *thr) ACQUIRE(thr->slot->mtx); +void SlotUnlock(ThreadState *thr) RELEASE(thr->slot->mtx); +void DoReset(ThreadState *thr, uptr epoch); +void FlushShadowMemory(); + ThreadState *FiberCreate(ThreadState *thr, uptr pc, unsigned flags); void FiberDestroy(ThreadState *thr, uptr pc, ThreadState *fiber); void FiberSwitch(ThreadState *thr, uptr pc, ThreadState *fiber, unsigned flags); @@ -648,6 +609,53 @@ FiberSwitchFlagNoSync = 1 << 0, // __tsan_switch_to_fiber_no_sync }; +class SlotPairLocker { + public: + SlotPairLocker(ThreadState *thr, Sid sid); + ~SlotPairLocker(); + + private: + ThreadState *thr_; + TidSlot *slot_; +}; + +class SlotLocker { + public: + ALWAYS_INLINE + SlotLocker(ThreadState *thr, bool recursive = false) + : thr_(thr), locked_(recursive ? thr->slot_locked : false) { + if (!locked_) + SlotLock(thr_); + } + + ALWAYS_INLINE + ~SlotLocker() { + if (!locked_) + SlotUnlock(thr_); + } + + private: + ThreadState *thr_; + bool locked_; +}; + +class SlotUnlocker { + public: + SlotUnlocker(ThreadState *thr) : thr_(thr), locked_(thr->slot_locked) { + if (locked_) + SlotUnlock(thr_); + } + + ~SlotUnlocker() { + if (locked_) + SlotLock(thr_); + } + + private: + ThreadState *thr_; + bool locked_; +}; + ALWAYS_INLINE void ProcessPendingSignals(ThreadState *thr) { if (UNLIKELY(atomic_load_relaxed(&thr->pending_signals))) ProcessPendingSignalsImpl(thr); @@ -666,16 +674,19 @@ #endif } -namespace v3 { - +void TraceResetForTesting(); void TraceSwitchPart(ThreadState *thr); -bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr, - uptr size, AccessType typ, VarSizeStackTrace *pstk, +void TraceSwitchPartImpl(ThreadState *thr); +bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, + AccessType typ, Tid *ptid, VarSizeStackTrace *pstk, MutexSet *pmset, uptr *ptag); template ALWAYS_INLINE WARN_UNUSED_RESULT bool TraceAcquire(ThreadState *thr, EventT **ev) { + // TraceSwitchPart accesses shadow_stack, but it's called infrequently, + // so we check it here proactively. + DCHECK(thr->shadow_stack); Event *pos = reinterpret_cast(atomic_load_relaxed(&thr->trace_pos)); #if SANITIZER_DEBUG // TraceSwitch acquires these mutexes, @@ -746,20 +757,16 @@ void TraceMutexUnlock(ThreadState *thr, uptr addr); void TraceTime(ThreadState *thr); -} // namespace v3 +void TraceRestartFuncExit(ThreadState *thr); +void TraceRestartFuncEntry(ThreadState *thr, uptr pc); void GrowShadowStack(ThreadState *thr); ALWAYS_INLINE void FuncEntry(ThreadState *thr, uptr pc) { - DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void *)pc); - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc); - } - - // Shadow stack maintenance can be replaced with - // stack unwinding during trace switch (which presumably must be faster). + DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.sid(), (void *)pc); + if (UNLIKELY(!TryTraceFunc(thr, pc))) + return TraceRestartFuncEntry(thr, pc); DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack); #if !SANITIZER_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); @@ -773,12 +780,9 @@ ALWAYS_INLINE void FuncExit(ThreadState *thr) { - DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid()); - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0); - } - + DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.sid()); + if (UNLIKELY(!TryTraceFunc(thr, 0))) + return TraceRestartFuncExit(thr); DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack); #if !SANITIZER_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); @@ -790,7 +794,6 @@ extern void (*on_initialize)(void); extern int (*on_finalize)(int); #endif - } // namespace __tsan #endif // TSAN_RTL_H diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -57,110 +57,348 @@ bool OnFinalize(bool failed); void OnInitialize(); #else -#include SANITIZER_WEAK_CXX_DEFAULT_IMPL bool OnFinalize(bool failed) { -#if !SANITIZER_GO +# if !SANITIZER_GO if (on_finalize) return on_finalize(failed); -#endif +# endif return failed; } + SANITIZER_WEAK_CXX_DEFAULT_IMPL void OnInitialize() { -#if !SANITIZER_GO +# if !SANITIZER_GO if (on_initialize) on_initialize(); -#endif +# endif } #endif -static ThreadContextBase *CreateThreadContext(Tid tid) { - // Map thread trace when context is created. - char name[50]; - internal_snprintf(name, sizeof(name), "trace %u", tid); - MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event), name); - const uptr hdr = GetThreadTraceHeader(tid); - internal_snprintf(name, sizeof(name), "trace header %u", tid); - MapThreadTrace(hdr, sizeof(Trace), name); - new((void*)hdr) Trace(); - // We are going to use only a small part of the trace with the default - // value of history_size. However, the constructor writes to the whole trace. - // Release the unused part. - uptr hdr_end = hdr + sizeof(Trace); - hdr_end -= sizeof(TraceHeader) * (kTraceParts - TraceParts()); - hdr_end = RoundUp(hdr_end, GetPageSizeCached()); - if (hdr_end < hdr + sizeof(Trace)) { - ReleaseMemoryPagesToOS(hdr_end, hdr + sizeof(Trace)); - uptr unused = hdr + sizeof(Trace) - hdr_end; - if (hdr_end != (uptr)MmapFixedNoAccess(hdr_end, unused)) { - Report("ThreadSanitizer: failed to mprotect [0x%zx-0x%zx) \n", hdr_end, - unused); - CHECK("unable to mprotect" && 0); +static TracePart* TracePartAlloc(ThreadState* thr) { + TracePart* part = nullptr; + { + Lock lock(&ctx->slot_mtx); + uptr max_parts = Trace::kMinParts + flags()->history_size; + Trace* trace = &thr->tctx->trace; + if (trace->parts_allocated == max_parts || + ctx->trace_part_finished_excess) { + part = ctx->trace_part_recycle.PopFront(); + DPrintf("#%d: TracePartAlloc: part=%p\n", thr->tid, part); + if (part && part->trace) { + Trace* trace1 = part->trace; + Lock trace_lock(&trace1->mtx); + part->trace = nullptr; + TracePart* part1 = trace1->parts.PopFront(); + CHECK_EQ(part, part1); + if (trace1->parts_allocated > trace1->parts.Size()) { + ctx->trace_part_finished_excess += + trace1->parts_allocated - trace1->parts.Size(); + trace1->parts_allocated = trace1->parts.Size(); + } + } + } + if (trace->parts_allocated < max_parts) { + trace->parts_allocated++; + if (ctx->trace_part_finished_excess) + ctx->trace_part_finished_excess--; } + if (!part) + ctx->trace_part_total_allocated++; + else if (ctx->trace_part_recycle_finished) + ctx->trace_part_recycle_finished--; } - return New(tid); + if (!part) + part = new (MmapOrDie(sizeof(*part), "TracePart")) TracePart(); + return part; } +static void TracePartFree(TracePart* part) REQUIRES(ctx->slot_mtx) { + DCHECK(part->trace); + part->trace = nullptr; + ctx->trace_part_recycle.PushFront(part); +} + +void TraceResetForTesting() { + Lock lock(&ctx->slot_mtx); + while (auto* part = ctx->trace_part_recycle.PopFront()) { + if (auto trace = part->trace) + CHECK_EQ(trace->parts.PopFront(), part); + UnmapOrDie(part, sizeof(*part)); + } + ctx->trace_part_total_allocated = 0; + ctx->trace_part_recycle_finished = 0; + ctx->trace_part_finished_excess = 0; +} + +static void DoResetImpl(uptr epoch) { + ThreadRegistryLock lock0(&ctx->thread_registry); + Lock lock1(&ctx->slot_mtx); + CHECK_EQ(ctx->global_epoch, epoch); + ctx->global_epoch++; + CHECK(!ctx->resetting); + ctx->resetting = true; + for (u32 i = ctx->thread_registry.NumThreadsLocked(); i--;) { + ThreadContext* tctx = (ThreadContext*)ctx->thread_registry.GetThreadLocked( + static_cast(i)); + // Potentially we could purge all ThreadStatusDead threads from the + // registry. Since we reset all shadow, they can't race with anything + // anymore. However, their tid's can still be stored in some aux places + // (e.g. tid of thread that created something). + auto trace = &tctx->trace; + Lock lock(&trace->mtx); + bool attached = tctx->thr && tctx->thr->slot; + auto parts = &trace->parts; + bool local = false; + while (!parts->Empty()) { + auto part = parts->Front(); + local = local || part == trace->local_head; + if (local) + CHECK(!ctx->trace_part_recycle.Queued(part)); + else + ctx->trace_part_recycle.Remove(part); + if (attached && parts->Size() == 1) { + // The thread is running and this is the last/current part. + // Set the trace position to the end of the current part + // to force the thread to call SwitchTracePart and re-attach + // to a new slot and allocate a new trace part. + // Note: the thread is concurrently modifying the position as well, + // so this is only best-effort. The thread can only modify position + // within this part, because switching parts is protected by + // slot/trace mutexes that we hold here. + atomic_store_relaxed( + &tctx->thr->trace_pos, + reinterpret_cast(&part->events[TracePart::kSize])); + break; + } + parts->Remove(part); + TracePartFree(part); + } + CHECK_LE(parts->Size(), 1); + trace->local_head = parts->Front(); + if (tctx->thr && !tctx->thr->slot) { + atomic_store_relaxed(&tctx->thr->trace_pos, 0); + tctx->thr->trace_prev_pc = 0; + } + if (trace->parts_allocated > trace->parts.Size()) { + ctx->trace_part_finished_excess += + trace->parts_allocated - trace->parts.Size(); + trace->parts_allocated = trace->parts.Size(); + } + } + while (ctx->slot_queue.PopFront()) { + } + for (auto& slot : ctx->slots) { + slot.SetEpoch(kEpochZero); + slot.journal.Reset(); + slot.thr = nullptr; + ctx->slot_queue.PushBack(&slot); + } + + DPrintf("Resetting shadow...\n"); + if (!MmapFixedSuperNoReserve(ShadowBeg(), ShadowEnd() - ShadowBeg(), + "shadow")) { + Printf("failed to reset shadow memory\n"); + Die(); + } + DPrintf("Resetting meta shadow...\n"); + ctx->metamap.ResetClocks(); + ctx->resetting = false; +} + +// Clang does not understand locking all slots in the loop: +// error: expecting mutex 'slot.mtx' to be held at start of each loop +void DoReset(ThreadState* thr, uptr epoch) NO_THREAD_SAFETY_ANALYSIS { + { + Lock l(&ctx->multi_slot_mtx); + for (auto& slot : ctx->slots) { + slot.mtx.Lock(); + if (UNLIKELY(epoch == 0)) + epoch = ctx->global_epoch; + if (UNLIKELY(epoch != ctx->global_epoch)) { + // Epoch can't change once we've locked the first slot. + CHECK_EQ(slot.sid, 0); + slot.mtx.Unlock(); + return; + } + } + } + DPrintf("#%d: DoReset epoch=%lu\n", thr ? thr->tid : -1, epoch); + DoResetImpl(epoch); + for (auto& slot : ctx->slots) slot.mtx.Unlock(); +} + +void FlushShadowMemory() { DoReset(nullptr, 0); } + +static TidSlot* FindSlotAndLock(ThreadState* thr) + ACQUIRE(thr->slot->mtx) NO_THREAD_SAFETY_ANALYSIS { + CHECK(!thr->slot); + TidSlot* slot = nullptr; + for (;;) { + uptr epoch; + { + Lock lock(&ctx->slot_mtx); + epoch = ctx->global_epoch; + if (slot) { + // This is an exhausted slot from the previous iteration. + if (ctx->slot_queue.Queued(slot)) + ctx->slot_queue.Remove(slot); + thr->slot_locked = false; + slot->mtx.Unlock(); + } + for (;;) { + slot = ctx->slot_queue.PopFront(); + if (!slot) + break; + if (slot->epoch() != kEpochLast) { + ctx->slot_queue.PushBack(slot); + break; + } + } + } + if (!slot) { + DoReset(thr, epoch); + continue; + } + slot->mtx.Lock(); + CHECK(!thr->slot_locked); + thr->slot_locked = true; + if (slot->thr) { + DPrintf("#%d: preempting sid=%d tid=%d\n", thr->tid, (u32)slot->sid, + slot->thr->tid); + slot->SetEpoch(slot->thr->fast_state.epoch()); + slot->thr = nullptr; + } + if (slot->epoch() != kEpochLast) + return slot; + } +} + +void SlotAttachAndLock(ThreadState* thr) { + TidSlot* slot = FindSlotAndLock(thr); + DPrintf("#%d: SlotAttach: slot=%u\n", thr->tid, static_cast(slot->sid)); + CHECK(!slot->thr); + CHECK(!thr->slot); + slot->thr = thr; + thr->slot = slot; + Epoch epoch = EpochInc(slot->epoch()); + CHECK(!EpochOverflow(epoch)); + slot->SetEpoch(epoch); + thr->fast_state.SetSid(slot->sid); + thr->fast_state.SetEpoch(epoch); + if (thr->slot_epoch != ctx->global_epoch) { + thr->slot_epoch = ctx->global_epoch; + thr->clock.Reset(); #if !SANITIZER_GO -static const u32 kThreadQuarantineSize = 16; -#else -static const u32 kThreadQuarantineSize = 64; + thr->last_sleep_stack_id = kInvalidStackID; + thr->last_sleep_clock.Reset(); #endif + } + thr->clock.Set(slot->sid, epoch); + slot->journal.PushBack({thr->tid, epoch}); +} + +static void SlotDetachImpl(ThreadState* thr, bool exiting) { + TidSlot* slot = thr->slot; + thr->slot = nullptr; + if (thr != slot->thr) { + slot = nullptr; // we don't own the slot anymore + if (thr->slot_epoch != ctx->global_epoch) { + TracePart* part = nullptr; + auto* trace = &thr->tctx->trace; + { + Lock l(&trace->mtx); + auto* parts = &trace->parts; + // The trace can be completely empty in an unlikely event + // the thread is preempted right after it acquired the slot + // in ThreadStart and did not trace any events yet. + CHECK_LE(parts->Size(), 1); + part = parts->PopFront(); + thr->tctx->trace.local_head = nullptr; + atomic_store_relaxed(&thr->trace_pos, 0); + thr->trace_prev_pc = 0; + } + if (part) { + Lock l(&ctx->slot_mtx); + TracePartFree(part); + } + } + return; + } + CHECK(exiting || thr->fast_state.epoch() == kEpochLast); + slot->SetEpoch(thr->fast_state.epoch()); + slot->thr = nullptr; +} + +void SlotDetach(ThreadState* thr) { + Lock lock(&thr->slot->mtx); + SlotDetachImpl(thr, true); +} + +void SlotLock(ThreadState* thr) NO_THREAD_SAFETY_ANALYSIS { + DCHECK(!thr->slot_locked); + TidSlot* slot = thr->slot; + slot->mtx.Lock(); + thr->slot_locked = true; + if (LIKELY(thr == slot->thr && thr->fast_state.epoch() != kEpochLast)) + return; + SlotDetachImpl(thr, false); + thr->slot_locked = false; + slot->mtx.Unlock(); + SlotAttachAndLock(thr); +} + +void SlotUnlock(ThreadState* thr) { + DCHECK(thr->slot_locked); + thr->slot_locked = false; + thr->slot->mtx.Unlock(); +} Context::Context() : initialized(), report_mtx(MutexTypeReport), nreported(), - thread_registry(CreateThreadContext, kMaxTid, kThreadQuarantineSize, - kMaxTidReuse), + thread_registry([](Tid tid) -> ThreadContextBase* { + return new (Alloc(sizeof(ThreadContext))) ThreadContext(tid); + }), racy_mtx(MutexTypeRacy), racy_stacks(), racy_addresses(), fired_suppressions_mtx(MutexTypeFired), - clock_alloc(LINKER_INITIALIZED, "clock allocator") { + clock_alloc(LINKER_INITIALIZED, "clock allocator"), + slot_mtx(MutexTypeSlots), + multi_slot_mtx(MutexTypeMultiSlot), + resetting() { fired_suppressions.reserve(8); + for (uptr i = 0; i < ARRAY_SIZE(slots); i++) { + TidSlot* slot = &slots[i]; + slot->sid = static_cast(i); + slot_queue.PushBack(slot); + } + global_epoch = 1; } +TidSlot::TidSlot() : mtx(MutexTypeSlot) {} + // The objects are allocated in TLS, so one may rely on zero-initialization. -ThreadState::ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch, - unsigned reuse_count, uptr stk_addr, uptr stk_size, - uptr tls_addr, uptr tls_size) - : fast_state(tid, epoch) - // Do not touch these, rely on zero initialization, - // they may be accessed before the ctor. - // , ignore_reads_and_writes() - // , ignore_interceptors() - , - clock(tid, reuse_count) -#if !SANITIZER_GO - , - jmp_bufs() -#endif - , - tid(tid), - unique_id(unique_id), - stk_addr(stk_addr), - stk_size(stk_size), - tls_addr(tls_addr), - tls_size(tls_size) -#if !SANITIZER_GO - , - last_sleep_clock(tid) -#endif -{ +ThreadState::ThreadState(Tid tid) + // Do not touch these, rely on zero initialization, + // they may be accessed before the ctor. + // ignore_reads_and_writes() + // ignore_interceptors() + : tid(tid) { CHECK_EQ(reinterpret_cast(this) % SANITIZER_CACHE_LINE_SIZE, 0); #if !SANITIZER_GO // C/C++ uses fixed size shadow stack. const int kInitStackSize = kShadowStackSize; - shadow_stack = static_cast( + shadow_stack = static_cast( MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack")); SetShadowRegionHugePageMode(reinterpret_cast(shadow_stack), kInitStackSize * sizeof(uptr)); #else // Go uses malloc-allocated shadow stack with dynamic size. const int kInitStackSize = 8; - shadow_stack = static_cast(Alloc(kInitStackSize * sizeof(uptr))); + shadow_stack = static_cast(Alloc(kInitStackSize * sizeof(uptr))); #endif shadow_stack_pos = shadow_stack; shadow_stack_end = shadow_stack + kInitStackSize; @@ -278,7 +516,8 @@ if (size == 0) return; DontNeedShadowFor(addr, size); ScopedGlobalProcessor sgp; - ctx->metamap.ResetRange(thr->proc(), addr, size); + SlotLocker locker(thr, true); + ctx->metamap.ResetRange(thr->proc(), addr, size, true); } #endif @@ -324,18 +563,6 @@ addr + size, meta_begin, meta_end); } -void MapThreadTrace(uptr addr, uptr size, const char *name) { - DPrintf("#0: Mapping trace at 0x%zx-0x%zx(0x%zx)\n", addr, addr + size, size); - CHECK_GE(addr, TraceMemBeg()); - CHECK_LE(addr + size, TraceMemEnd()); - CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment - if (!MmapFixedSuperNoReserve(addr, size, name)) { - Printf("FATAL: ThreadSanitizer can not mmap thread trace (0x%zx/0x%zx)\n", - addr, size); - Die(); - } -} - #if !SANITIZER_GO static void OnStackUnwind(const SignalContext &sig, const void *, BufferedStackTrace *stack) { @@ -354,8 +581,11 @@ // since we are going to die soon. ScopedIgnoreInterceptors ignore; #if !SANITIZER_GO - cur_thread()->ignore_sync++; - cur_thread()->ignore_reads_and_writes++; + ThreadState* thr = cur_thread(); + thr->nomalloc = false; + thr->ignore_sync++; + thr->ignore_reads_and_writes++; + atomic_store_relaxed(&thr->in_signal_handler, 0); #endif PrintCurrentStackSlow(StackTrace::GetCurrentPc()); } @@ -410,22 +640,22 @@ Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer); #endif - VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n", + VPrintf(1, "***** Running under ThreadSanitizer v3 (pid %d) *****\n", (int)internal_getpid()); // Initialize thread 0. - Tid tid = ThreadCreate(thr, 0, 0, true); + Tid tid = ThreadCreate(nullptr, 0, 0, true); CHECK_EQ(tid, kMainTid); ThreadStart(thr, tid, GetTid(), ThreadType::Regular); #if TSAN_CONTAINS_UBSAN __ubsan::InitAsPlugin(); #endif - ctx->initialized = true; #if !SANITIZER_GO Symbolizer::LateInitialize(); InitializeMemoryProfiler(); #endif + ctx->initialized = true; if (flags()->stop_on_start) { Printf("ThreadSanitizer is suspended at startup (pid %d)." @@ -451,7 +681,6 @@ #endif } - int Finalize(ThreadState *thr) { bool failed = false; @@ -459,12 +688,12 @@ DumpProcessMap(); if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1) - SleepForMillis(flags()->atexit_sleep_ms); + internal_usleep(u64(flags()->atexit_sleep_ms) * 1000); - // Wait for pending reports. - ctx->report_mtx.Lock(); - { ScopedErrorReportLock l; } - ctx->report_mtx.Unlock(); + { + // Wait for pending reports. + ScopedErrorReportLock lock; + } #if !SANITIZER_GO if (Verbosity()) AllocatorPrintStats(); @@ -491,8 +720,13 @@ #if !SANITIZER_GO void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { + // Detaching from the slot makes OnUserFree skip writing to the shadow. + // The slot will be locked so any attempts to use it will deadlock anyway. + SlotDetach(thr); + ctx->multi_slot_mtx.Lock(); + for (auto& slot : ctx->slots) slot.mtx.Lock(); ctx->thread_registry.Lock(); - ctx->report_mtx.Lock(); + ctx->slot_mtx.Lock(); ScopedErrorReportLock::Lock(); // Suppress all reports in the pthread_atfork callbacks. // Reports will deadlock on the report_mtx. @@ -505,30 +739,34 @@ // On OS X libSystem_atfork_prepare/parent/child callbacks are called // after/before our callbacks and they call free. thr->ignore_interceptors++; + // Disables memory write in OnUserFree. + thr->ignore_reads_and_writes++; __tsan_test_only_on_fork(); } -void ForkParentAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { +static void ForkAfter(ThreadState* thr) NO_THREAD_SAFETY_ANALYSIS { thr->suppress_reports--; // Enabled in ForkBefore. thr->ignore_interceptors--; + thr->ignore_reads_and_writes--; ScopedErrorReportLock::Unlock(); - ctx->report_mtx.Unlock(); + ctx->slot_mtx.Unlock(); ctx->thread_registry.Unlock(); + for (auto& slot : ctx->slots) slot.mtx.Unlock(); + ctx->multi_slot_mtx.Unlock(); + SlotAttachAndLock(thr); + SlotUnlock(thr); } -void ForkChildAfter(ThreadState *thr, uptr pc, - bool start_thread) NO_THREAD_SAFETY_ANALYSIS { - thr->suppress_reports--; // Enabled in ForkBefore. - thr->ignore_interceptors--; - ScopedErrorReportLock::Unlock(); - ctx->report_mtx.Unlock(); - ctx->thread_registry.Unlock(); +void ForkParentAfter(ThreadState* thr, uptr pc) { ForkAfter(thr); } - uptr nthread = 0; - ctx->thread_registry.GetNumberOfThreads(0, 0, &nthread /* alive threads */); - VPrintf(1, "ThreadSanitizer: forked new process with pid %d," - " parent had %d threads\n", (int)internal_getpid(), (int)nthread); +void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) { + ForkAfter(thr); + u32 nthread = ThreadCount(thr); + VPrintf(1, + "ThreadSanitizer: forked new process with pid %d," + " parent had %d threads\n", + (int)internal_getpid(), (int)nthread); if (nthread == 1) { if (start_thread) StartBackgroundThread(); @@ -538,6 +776,7 @@ // ignores for everything in the hope that we will exec soon. ctx->after_multithreaded_fork = true; thr->ignore_interceptors++; + thr->suppress_reports++; ThreadIgnoreBegin(thr, pc); ThreadIgnoreSyncBegin(thr, pc); } @@ -559,8 +798,10 @@ #endif StackID CurrentStackId(ThreadState *thr, uptr pc) { +#if !SANITIZER_GO if (!thr->is_inited) // May happen during bootstrap. return kInvalidStackID; +#endif if (pc != 0) { #if !SANITIZER_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); @@ -578,53 +819,72 @@ return id; } -namespace v3 { - -NOINLINE -void TraceSwitchPart(ThreadState *thr) { +static bool TraceSkipGap(ThreadState* thr) { Trace *trace = &thr->tctx->trace; Event *pos = reinterpret_cast(atomic_load_relaxed(&thr->trace_pos)); DCHECK_EQ(reinterpret_cast(pos + 1) & TracePart::kAlignment, 0); auto *part = trace->parts.Back(); - DPrintf("TraceSwitchPart part=%p pos=%p\n", part, pos); - if (part) { - // We can get here when we still have space in the current trace part. - // The fast-path check in TraceAcquire has false positives in the middle of - // the part. Check if we are indeed at the end of the current part or not, - // and fill any gaps with NopEvent's. - Event *end = &part->events[TracePart::kSize]; - DCHECK_GE(pos, &part->events[0]); - DCHECK_LE(pos, end); - if (pos + 1 < end) { - if ((reinterpret_cast(pos) & TracePart::kAlignment) == - TracePart::kAlignment) - *pos++ = NopEvent; + DPrintf("#%d: TraceSwitchPart enter trace=%p parts=%p-%p pos=%p\n", thr->tid, + trace, trace->parts.Front(), part, pos); + if (!part) + return false; + // We can get here when we still have space in the current trace part. + // The fast-path check in TraceAcquire has false positives in the middle of + // the part. Check if we are indeed at the end of the current part or not, + // and fill any gaps with NopEvent's. + Event* end = &part->events[TracePart::kSize]; + DCHECK_GE(pos, &part->events[0]); + DCHECK_LE(pos, end); + if (pos + 1 < end) { + if ((reinterpret_cast(pos) & TracePart::kAlignment) == + TracePart::kAlignment) *pos++ = NopEvent; - DCHECK_LE(pos + 2, end); - atomic_store_relaxed(&thr->trace_pos, reinterpret_cast(pos)); - // Ensure we setup trace so that the next TraceAcquire - // won't detect trace part end. - Event *ev; - CHECK(TraceAcquire(thr, &ev)); - return; - } - // We are indeed at the end. - for (; pos < end; pos++) *pos = NopEvent; + *pos++ = NopEvent; + DCHECK_LE(pos + 2, end); + atomic_store_relaxed(&thr->trace_pos, reinterpret_cast(pos)); + return true; } + // We are indeed at the end. + for (; pos < end; pos++) *pos = NopEvent; + return false; +} + +NOINLINE +void TraceSwitchPart(ThreadState* thr) { + if (TraceSkipGap(thr)) + return; #if !SANITIZER_GO if (ctx->after_multithreaded_fork) { // We just need to survive till exec. - CHECK(part); - atomic_store_relaxed(&thr->trace_pos, - reinterpret_cast(&part->events[0])); - return; + TracePart* part = thr->tctx->trace.parts.Back(); + if (part) { + atomic_store_relaxed(&thr->trace_pos, + reinterpret_cast(&part->events[0])); + return; + } } #endif - part = new (MmapOrDie(sizeof(TracePart), "TracePart")) TracePart(); + TraceSwitchPartImpl(thr); +} + +void TraceSwitchPartImpl(ThreadState* thr) { + SlotLocker locker(thr, true); + Trace* trace = &thr->tctx->trace; + TracePart* part = TracePartAlloc(thr); part->trace = trace; thr->trace_prev_pc = 0; + TracePart* recycle = nullptr; + // Keep roughly half of parts local to the thread + // (not queued into the recycle queue). + uptr local_parts = (Trace::kMinParts + flags()->history_size + 1) / 2; { Lock lock(&trace->mtx); + if (trace->parts.Empty()) + trace->local_head = part; + if (trace->parts.Size() >= local_parts) { + recycle = trace->local_head; + trace->local_head = trace->parts.Next(recycle); + } trace->parts.PushBack(part); atomic_store_relaxed(&thr->trace_pos, reinterpret_cast(&part->events[0])); @@ -632,60 +892,45 @@ // Make this part self-sufficient by restoring the current stack // and mutex set in the beginning of the trace. TraceTime(thr); - for (uptr *pos = &thr->shadow_stack[0]; pos < thr->shadow_stack_pos; pos++) - CHECK(TryTraceFunc(thr, *pos)); + { + // Pathologically large stacks may not fit into the part. + // In these cases we log only fixed number of top frames. + const uptr kMaxFrames = 1000; + // Sanity check that kMaxFrames won't consume the whole part. + static_assert(kMaxFrames < TracePart::kSize / 2, "kMaxFrames is too big"); + uptr* pos = Max(&thr->shadow_stack[0], thr->shadow_stack_pos - kMaxFrames); + for (; pos < thr->shadow_stack_pos; pos++) { + if (TryTraceFunc(thr, *pos)) + continue; + CHECK(TraceSkipGap(thr)); + CHECK(TryTraceFunc(thr, *pos)); + } + } for (uptr i = 0; i < thr->mset.Size(); i++) { MutexSet::Desc d = thr->mset.Get(i); - TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0, - d.addr, d.stack_id); + for (uptr i = 0; i < d.count; i++) + TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0, + d.addr, d.stack_id); } -} - -} // namespace v3 - -void TraceSwitch(ThreadState *thr) { -#if !SANITIZER_GO - if (ctx->after_multithreaded_fork) - return; -#endif - thr->nomalloc++; - Trace *thr_trace = ThreadTrace(thr->tid); - Lock l(&thr_trace->mtx); - unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts(); - TraceHeader *hdr = &thr_trace->headers[trace]; - hdr->epoch0 = thr->fast_state.epoch(); - ObtainCurrentStack(thr, 0, &hdr->stack0); - hdr->mset0 = thr->mset; - thr->nomalloc--; -} - -Trace *ThreadTrace(Tid tid) { return (Trace *)GetThreadTraceHeader(tid); } - -uptr TraceTopPC(ThreadState *thr) { - Event *events = (Event*)GetThreadTrace(thr->tid); - uptr pc = events[thr->fast_state.GetTracePos()]; - return pc; -} - -uptr TraceSize() { - return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1)); -} - -uptr TraceParts() { - return TraceSize() / kTracePartSize; + { + Lock lock(&ctx->slot_mtx); + ctx->slot_queue.Remove(thr->slot); + ctx->slot_queue.PushBack(thr->slot); + if (recycle) + ctx->trace_part_recycle.PushBack(recycle); + } + DPrintf("#%d: TraceSwitchPart exit parts=%p-%p pos=0x%zx\n", thr->tid, + trace->parts.Front(), trace->parts.Back(), + atomic_load_relaxed(&thr->trace_pos)); } #if !SANITIZER_GO -extern "C" void __tsan_trace_switch() { - TraceSwitch(cur_thread()); -} +extern "C" void __tsan_trace_switch() {} -extern "C" void __tsan_report_race() { - ReportRace(cur_thread()); -} +extern "C" void __tsan_report_race() {} #endif -void ThreadIgnoreBegin(ThreadState *thr, uptr pc) { +void ThreadIgnoreBegin(ThreadState* thr, uptr pc) { DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid); thr->ignore_reads_and_writes++; CHECK_GT(thr->ignore_reads_and_writes, 0); @@ -745,7 +990,6 @@ #else void build_consistency_release() {} #endif - } // namespace __tsan #if SANITIZER_CHECK_DEADLOCKS @@ -753,18 +997,27 @@ using namespace __tsan; MutexMeta mutex_meta[] = { {MutexInvalid, "Invalid", {}}, - {MutexThreadRegistry, "ThreadRegistry", {}}, - {MutexTypeTrace, "Trace", {MutexLeaf}}, - {MutexTypeReport, "Report", {MutexTypeSyncVar}}, - {MutexTypeSyncVar, "SyncVar", {}}, - {MutexTypeAnnotations, "Annotations", {}}, - {MutexTypeAtExit, "AtExit", {MutexTypeSyncVar}}, + {MutexThreadRegistry, + "ThreadRegistry", + {MutexTypeSlots, MutexTypeTrace, MutexTypeReport}}, + {MutexTypeReport, "Report", {MutexTypeTrace}}, + {MutexTypeSyncVar, "SyncVar", {MutexTypeReport, MutexTypeTrace}}, + {MutexTypeAnnotations, "Annotations", {MutexLeaf}}, + {MutexTypeAtExit, "AtExit", {}}, {MutexTypeFired, "Fired", {MutexLeaf}}, {MutexTypeRacy, "Racy", {MutexLeaf}}, - {MutexTypeGlobalProc, "GlobalProc", {}}, + {MutexTypeGlobalProc, "GlobalProc", {MutexTypeSlot, MutexTypeSlots}}, + {MutexTypeTrace, "Trace", {}}, + {MutexTypeSlot, + "Slot", + {MutexMulti, MutexTypeTrace, MutexTypeSyncVar, MutexThreadRegistry, + MutexTypeSlots}}, + {MutexTypeSlots, "Slots", {MutexTypeTrace, MutexTypeReport}}, + {MutexTypeMultiSlot, "MultiSlot", {MutexTypeSlot, MutexTypeSlots}}, {}, }; void PrintMutexPC(uptr pc) { StackTrace(&pc, 1).Print(); } + } // namespace __sanitizer #endif diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp @@ -15,15 +15,13 @@ namespace __tsan { -namespace v3 { - -ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc, +ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc, uptr addr, uptr size, AccessType typ) { DCHECK(size == 1 || size == 2 || size == 4 || size == 8); if (!kCollectHistory) return true; - EventAccess *ev; + EventAccess* ev; if (UNLIKELY(!TraceAcquire(thr, &ev))) return false; u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3; @@ -40,25 +38,27 @@ TraceRelease(thr, ev); return true; } - auto *evex = reinterpret_cast(ev); + auto* evex = reinterpret_cast(ev); evex->is_access = 0; evex->is_func = 0; evex->type = EventType::kAccessExt; evex->is_read = !!(typ & kAccessRead); evex->is_atomic = !!(typ & kAccessAtomic); evex->size_log = size_log; + // Note: this is important, see comment in EventAccessExt. + evex->_ = 0; evex->addr = CompressAddr(addr); evex->pc = pc; TraceRelease(thr, evex); return true; } -ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc, - uptr addr, uptr size, - AccessType typ) { +ALWAYS_INLINE +bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, + AccessType typ) { if (!kCollectHistory) return true; - EventAccessRange *ev; + EventAccessRange* ev; if (UNLIKELY(!TraceAcquire(thr, &ev))) return false; thr->trace_prev_pc = pc; @@ -75,7 +75,7 @@ return true; } -void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, +void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, AccessType typ) { if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ))) return; @@ -84,7 +84,7 @@ DCHECK(res); } -void TraceFunc(ThreadState *thr, uptr pc) { +void TraceFunc(ThreadState* thr, uptr pc) { if (LIKELY(TryTraceFunc(thr, pc))) return; TraceSwitchPart(thr); @@ -92,7 +92,17 @@ DCHECK(res); } -void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr, +NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) { + TraceSwitchPart(thr); + FuncEntry(thr, pc); +} + +NOINLINE void TraceRestartFuncExit(ThreadState* thr) { + TraceSwitchPart(thr); + FuncExit(thr); +} + +void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr, StackID stk) { DCHECK(type == EventType::kLock || type == EventType::kRLock); if (!kCollectHistory) @@ -109,7 +119,7 @@ TraceEvent(thr, ev); } -void TraceMutexUnlock(ThreadState *thr, uptr addr) { +void TraceMutexUnlock(ThreadState* thr, uptr addr) { if (!kCollectHistory) return; EventUnlock ev; @@ -121,396 +131,485 @@ TraceEvent(thr, ev); } -void TraceTime(ThreadState *thr) { +void TraceTime(ThreadState* thr) { if (!kCollectHistory) return; + FastState fast_state = thr->fast_state; EventTime ev; ev.is_access = 0; ev.is_func = 0; ev.type = EventType::kTime; - ev.sid = static_cast(thr->sid); - ev.epoch = static_cast(thr->epoch); + ev.sid = static_cast(fast_state.sid()); + ev.epoch = static_cast(fast_state.epoch()); ev._ = 0; TraceEvent(thr, ev); } -} // namespace v3 +ALWAYS_INLINE RawShadow LoadShadow(RawShadow* p) { + return static_cast( + atomic_load((atomic_uint32_t*)p, memory_order_relaxed)); +} -ALWAYS_INLINE -Shadow LoadShadow(u64 *p) { - u64 raw = atomic_load((atomic_uint64_t *)p, memory_order_relaxed); - return Shadow(raw); +ALWAYS_INLINE void StoreShadow(RawShadow* sp, RawShadow s) { + atomic_store((atomic_uint32_t*)sp, static_cast(s), memory_order_relaxed); } -ALWAYS_INLINE -void StoreShadow(u64 *sp, u64 s) { - atomic_store((atomic_uint64_t *)sp, s, memory_order_relaxed); +NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, + Shadow old, + AccessType typ) NO_THREAD_SAFETY_ANALYSIS { + // For the free shadow markers the first element (that contains kFreeSid) + // triggers the race, but the second element contains info about the freeing + // thread, take it. + if (old.sid() == kFreeSid) + old = Shadow(LoadShadow(&shadow_mem[1])); + // This prevents trapping on this address in future. + for (uptr i = 0; i < kShadowCnt; i++) + StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty); + // See the comment in MemoryRangeFreed as to why the slot is locked + // for free memory accesses. ReportRace must not be called with + // the slot locked because of the fork. But MemoryRangeFreed is not + // called during fork because fork sets ignore_reads_and_writes, + // so simply unlocking the slot should be fine. + if (typ & kAccessFree) + SlotUnlock(thr); + ReportRace(thr, shadow_mem, cur, Shadow(old), typ); + if (typ & kAccessFree) + SlotLock(thr); } +#if !TSAN_VECTORIZE ALWAYS_INLINE -void StoreIfNotYetStored(u64 *sp, u64 *s) { - StoreShadow(sp, *s); - *s = 0; +bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1, + AccessType typ) { + for (uptr i = 0; i < kShadowCnt; i++) { + auto old = LoadShadow(&s[i]); + if (!(typ & kAccessRead)) { + if (old == cur.raw()) + return true; + continue; + } + auto masked = static_cast(static_cast(old) | + static_cast(Shadow::kRodata)); + if (masked == cur.raw()) + return true; + if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { + if (old == Shadow::kRodata) + return true; + } + } + return false; } -extern "C" void __tsan_report_race(); - ALWAYS_INLINE -void HandleRace(ThreadState *thr, u64 *shadow_mem, Shadow cur, Shadow old) { - thr->racy_state[0] = cur.raw(); - thr->racy_state[1] = old.raw(); - thr->racy_shadow_addr = shadow_mem; -#if !SANITIZER_GO - HACKY_CALL(__tsan_report_race); -#else - ReportRace(thr); -#endif +bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, + int unused0, int unused1, AccessType typ) { + bool stored = false; + for (uptr idx = 0; idx < kShadowCnt; idx++) { + RawShadow* sp = &shadow_mem[idx]; + Shadow old(LoadShadow(sp)); + if (LIKELY(old.raw() == Shadow::kEmpty)) { + if (!(typ & kAccessCheckOnly) && !stored) + StoreShadow(sp, cur.raw()); + return false; + } + if (LIKELY(!(cur.access() & old.access()))) + continue; + if (LIKELY(cur.sid() == old.sid())) { + if (!(typ & kAccessCheckOnly) && + LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) { + StoreShadow(sp, cur.raw()); + stored = true; + } + continue; + } + if (LIKELY(old.IsBothReadsOrAtomic(typ))) + continue; + if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch())) + continue; + DoReportRace(thr, shadow_mem, cur, old, typ); + return true; + } + // We did not find any races and had already stored + // the current access info, so we are done. + if (LIKELY(stored)) + return false; + // Choose a random candidate slot and replace it. + uptr index = + atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt; + StoreShadow(&shadow_mem[index], cur.raw()); + return false; } -static inline bool HappensBefore(Shadow old, ThreadState *thr) { - return thr->clock.get(old.TidWithIgnore()) >= old.epoch(); -} +# define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0 -ALWAYS_INLINE -void MemoryAccessImpl1(ThreadState *thr, uptr addr, int kAccessSizeLog, - bool kAccessIsWrite, bool kIsAtomic, u64 *shadow_mem, - Shadow cur) { - // This potentially can live in an MMX/SSE scratch register. - // The required intrinsics are: - // __m128i _mm_move_epi64(__m128i*); - // _mm_storel_epi64(u64*, __m128i); - u64 store_word = cur.raw(); - bool stored = false; +#else /* !TSAN_VECTORIZE */ - // scan all the shadow values and dispatch to 4 categories: - // same, replace, candidate and race (see comments below). - // we consider only 3 cases regarding access sizes: - // equal, intersect and not intersect. initially I considered - // larger and smaller as well, it allowed to replace some - // 'candidates' with 'same' or 'replace', but I think - // it's just not worth it (performance- and complexity-wise). - - Shadow old(0); - - // It release mode we manually unroll the loop, - // because empirically gcc generates better code this way. - // However, we can't afford unrolling in debug mode, because the function - // consumes almost 4K of stack. Gtest gives only 4K of stack to death test - // threads, which is not enough for the unrolled loop. -#if SANITIZER_DEBUG - for (int idx = 0; idx < 4; idx++) { -# include "tsan_update_shadow_word.inc" - } -#else - int idx = 0; -# include "tsan_update_shadow_word.inc" - idx = 1; - if (stored) { -# include "tsan_update_shadow_word.inc" - } else { -# include "tsan_update_shadow_word.inc" - } - idx = 2; - if (stored) { -# include "tsan_update_shadow_word.inc" - } else { -# include "tsan_update_shadow_word.inc" +ALWAYS_INLINE +bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow, + m128 access, AccessType typ) { + // Note: we could check if there is a larger access of the same type, + // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes) + // and now do smaller reads/writes, these can also be considered as "same + // access". However, it will make the check more expensive, so it's unclear + // if it's worth it. But this would conserve trace space, so it's useful + // besides potential speed up. + if (!(typ & kAccessRead)) { + const m128 same = _mm_cmpeq_epi32(shadow, access); + return _mm_movemask_epi8(same); } - idx = 3; - if (stored) { -# include "tsan_update_shadow_word.inc" - } else { -# include "tsan_update_shadow_word.inc" + // For reads we need to reset read bit in the shadow, + // because we need to match read with both reads and writes. + // Shadow::kRodata has only read bit set, so it does what we want. + // We also abuse it for rodata check to save few cycles + // since we already loaded Shadow::kRodata into a register. + // Reads from rodata can't race. + // Measurements show that they can be 10-20% of all memory accesses. + // Shadow::kRodata has epoch 0 which cannot appear in shadow normally + // (thread epochs start from 1). So the same read bit mask + // serves as rodata indicator. + const m128 read_mask = _mm_set1_epi32(static_cast(Shadow::kRodata)); + const m128 masked_shadow = _mm_or_si128(shadow, read_mask); + m128 same = _mm_cmpeq_epi32(masked_shadow, access); + // Range memory accesses check Shadow::kRodata before calling this, + // Shadow::kRodatas is not possible for free memory access + // and Go does not use Shadow::kRodata. + if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { + const m128 ro = _mm_cmpeq_epi32(shadow, read_mask); + same = _mm_or_si128(ro, same); } -#endif + return _mm_movemask_epi8(same); +} - // we did not find any races and had already stored - // the current access info, so we are done - if (LIKELY(stored)) - return; - // choose a random candidate slot and replace it - StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word); - return; -RACE: - HandleRace(thr, shadow_mem, cur, old); - return; -} - -void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, - AccessType typ) { - DCHECK(!(typ & kAccessAtomic)); - const bool kAccessIsWrite = !(typ & kAccessRead); - const bool kIsAtomic = false; - while (size) { - int size1 = 1; - int kAccessSizeLog = kSizeLog1; - if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) { - size1 = 8; - kAccessSizeLog = kSizeLog8; - } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) { - size1 = 4; - kAccessSizeLog = kSizeLog4; - } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) { - size1 = 2; - kAccessSizeLog = kSizeLog2; - } - MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic); - addr += size1; - size -= size1; +NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, + u32 race_mask, m128 shadow, AccessType typ) { + // race_mask points which of the shadow elements raced with the current + // access. Extract that element. + CHECK_NE(race_mask, 0); + u32 old; + // Note: _mm_extract_epi32 index must be a constant value. + switch (__builtin_ffs(race_mask) / 4) { + case 0: + old = _mm_extract_epi32(shadow, 0); + break; + case 1: + old = _mm_extract_epi32(shadow, 1); + break; + case 2: + old = _mm_extract_epi32(shadow, 2); + break; + case 3: + old = _mm_extract_epi32(shadow, 3); + break; } + Shadow prev(static_cast(old)); + // For the free shadow markers the first element (that contains kFreeSid) + // triggers the race, but the second element contains info about the freeing + // thread, take it. + if (prev.sid() == kFreeSid) + prev = Shadow(static_cast(_mm_extract_epi32(shadow, 1))); + DoReportRace(thr, shadow_mem, cur, prev, typ); } ALWAYS_INLINE -bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) { - Shadow cur(a); - for (uptr i = 0; i < kShadowCnt; i++) { - Shadow old(LoadShadow(&s[i])); - if (Shadow::Addr0AndSizeAreEqual(cur, old) && - old.TidWithIgnore() == cur.TidWithIgnore() && - old.epoch() > sync_epoch && old.IsAtomic() == cur.IsAtomic() && - old.IsRead() <= cur.IsRead()) - return true; +bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, + m128 shadow, m128 access, AccessType typ) { + // Note: empty/zero slots don't intersect with any access. + const m128 zero = _mm_setzero_si128(); + const m128 mask_access = _mm_set1_epi32(0x000000ff); + const m128 mask_sid = _mm_set1_epi32(0x0000ff00); + const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000); + const m128 access_and = _mm_and_si128(access, shadow); + const m128 access_xor = _mm_xor_si128(access, shadow); + const m128 intersect = _mm_and_si128(access_and, mask_access); + const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero); + const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid); + const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero); + const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic); + const m128 no_race = + _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic); + const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero)); + if (UNLIKELY(race_mask)) + goto SHARED; + +STORE : { + if (typ & kAccessCheckOnly) + return false; + // We could also replace different sid's if access is the same, + // rw weaker and happens before. However, just checking access below + // is not enough because we also need to check that !both_read_or_atomic + // (reads from different sids can be concurrent). + // Theoretically we could replace smaller accesses with larger accesses, + // but it's unclear if it's worth doing. + const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff); + const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid); + const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero); + const m128 access_read_atomic = + _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30); + const m128 rw_weaker = + _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow); + const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker); + const int rewrite_mask = _mm_movemask_epi8(rewrite); + int index = __builtin_ffs(rewrite_mask); + if (UNLIKELY(index == 0)) { + const m128 empty = _mm_cmpeq_epi32(shadow, zero); + const int empty_mask = _mm_movemask_epi8(empty); + index = __builtin_ffs(empty_mask); + if (UNLIKELY(index == 0)) + index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16; } + StoreShadow(&shadow_mem[index / 4], cur.raw()); + // We could zero other slots determined by rewrite_mask. + // That would help other threads to evict better slots, + // but it's unclear if it's worth it. return false; } -#if TSAN_VECTORIZE -# define SHUF(v0, v1, i0, i1, i2, i3) \ - _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \ - _mm_castsi128_ps(v1), \ - (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64)) -ALWAYS_INLINE -bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) { - // This is an optimized version of ContainsSameAccessSlow. - // load current access into access[0:63] - const m128 access = _mm_cvtsi64_si128(a); - // duplicate high part of access in addr0: - // addr0[0:31] = access[32:63] - // addr0[32:63] = access[32:63] - // addr0[64:95] = access[32:63] - // addr0[96:127] = access[32:63] - const m128 addr0 = SHUF(access, access, 1, 1, 1, 1); - // load 4 shadow slots - const m128 shadow0 = _mm_load_si128((__m128i *)s); - const m128 shadow1 = _mm_load_si128((__m128i *)s + 1); - // load high parts of 4 shadow slots into addr_vect: - // addr_vect[0:31] = shadow0[32:63] - // addr_vect[32:63] = shadow0[96:127] - // addr_vect[64:95] = shadow1[32:63] - // addr_vect[96:127] = shadow1[96:127] - m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3); - if (!is_write) { - // set IsRead bit in addr_vect - const m128 rw_mask1 = _mm_cvtsi64_si128(1 << 15); - const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0); - addr_vect = _mm_or_si128(addr_vect, rw_mask); - } - // addr0 == addr_vect? - const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect); - // epoch1[0:63] = sync_epoch - const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch); - // epoch[0:31] = sync_epoch[0:31] - // epoch[32:63] = sync_epoch[0:31] - // epoch[64:95] = sync_epoch[0:31] - // epoch[96:127] = sync_epoch[0:31] - const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0); - // load low parts of shadow cell epochs into epoch_vect: - // epoch_vect[0:31] = shadow0[0:31] - // epoch_vect[32:63] = shadow0[64:95] - // epoch_vect[64:95] = shadow1[0:31] - // epoch_vect[96:127] = shadow1[64:95] - const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2); - // epoch_vect >= sync_epoch? - const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch); - // addr_res & epoch_res - const m128 res = _mm_and_si128(addr_res, epoch_res); - // mask[0] = res[7] - // mask[1] = res[15] - // ... - // mask[15] = res[127] - const int mask = _mm_movemask_epi8(res); - return mask != 0; +SHARED: + m128 thread_epochs = _mm_set1_epi32(0x7fffffff); + // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32 + // indexes must be constants. +# define LOAD_EPOCH(idx) \ + if (LIKELY(race_mask & (1 << (idx * 4)))) { \ + u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \ + u16 epoch = static_cast(thr->clock.Get(static_cast(sid))); \ + thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \ + } + LOAD_EPOCH(0); + LOAD_EPOCH(1); + LOAD_EPOCH(2); + LOAD_EPOCH(3); +# undef LOAD_EPOCH + const m128 mask_epoch = _mm_set1_epi32(0x3fff0000); + const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch); + const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs); + const int concurrent_mask = _mm_movemask_epi8(concurrent); + if (LIKELY(concurrent_mask == 0)) + goto STORE; + + DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ); + return true; } -#endif -ALWAYS_INLINE -bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) { -#if TSAN_VECTORIZE - bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write); - // NOTE: this check can fail if the shadow is concurrently mutated - // by other threads. But it still can be useful if you modify - // ContainsSameAccessFast and want to ensure that it's not completely broken. - // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write)); - return res; -#else - return ContainsSameAccessSlow(s, a, sync_epoch, is_write); +# define LOAD_CURRENT_SHADOW(cur, shadow_mem) \ + const m128 access = _mm_set1_epi32(static_cast((cur).raw())); \ + const m128 shadow = _mm_load_si128(reinterpret_cast(shadow_mem)) #endif -} -ALWAYS_INLINE USED void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, - int kAccessSizeLog, bool kAccessIsWrite, - bool kIsAtomic) { - RawShadow *shadow_mem = MemToShadow(addr); - DPrintf2( - "#%d: MemoryAccess: @%p %p size=%d" - " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n", - (int)thr->fast_state.tid(), (void *)pc, (void *)addr, - (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem, - (uptr)shadow_mem[0], (uptr)shadow_mem[1], (uptr)shadow_mem[2], - (uptr)shadow_mem[3]); -#if SANITIZER_DEBUG - if (!IsAppMem(addr)) { - Printf("Access to non app mem %zx\n", addr); - DCHECK(IsAppMem(addr)); - } - if (!IsShadowMem(shadow_mem)) { - Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr); - DCHECK(IsShadowMem(shadow_mem)); +char* DumpShadow(char* buf, RawShadow raw) { + if (raw == Shadow::kEmpty) { + internal_snprintf(buf, 64, "0"); + return buf; } -#endif + Shadow s(raw); + AccessType typ; + s.GetAccess(nullptr, nullptr, &typ); + internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}", + static_cast(s.sid()), static_cast(s.epoch()), + s.access(), static_cast(typ)); + return buf; +} - if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) { - // Access to .rodata section, no races here. - // Measurements show that it can be 10-20% of all memory accesses. - return; - } +// TryTrace* and TraceRestart* functions allow to turn memory access and func +// entry/exit callbacks into leaf functions with all associated performance +// benefits. These hottest callbacks do only 2 slow path calls: report a race +// and trace part switching. Race reporting is easy to turn into a tail call, we +// just always return from the runtime after reporting a race. But trace part +// switching is harder because it needs to be in the middle of callbacks. To +// turn it into a tail call we immidiately return after TraceRestart* functions, +// but TraceRestart* functions themselves recurse into the callback after +// switching trace part. As the result the hottest callbacks contain only tail +// calls, which effectively makes them leaf functions (can use all registers, +// no frame setup, etc). +NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr, + uptr size, AccessType typ) { + TraceSwitchPart(thr); + MemoryAccess(thr, pc, addr, size, typ); +} - FastState fast_state = thr->fast_state; - if (UNLIKELY(fast_state.GetIgnoreBit())) { - return; - } +ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr, + uptr size, AccessType typ) { + RawShadow* shadow_mem = MemToShadow(addr); + UNUSED char memBuf[4][64]; + DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid, + static_cast(thr->fast_state.sid()), + static_cast(thr->fast_state.epoch()), (void*)addr, size, + static_cast(typ), DumpShadow(memBuf[0], shadow_mem[0]), + DumpShadow(memBuf[1], shadow_mem[1]), + DumpShadow(memBuf[2], shadow_mem[2]), + DumpShadow(memBuf[3], shadow_mem[3])); - Shadow cur(fast_state); - cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog); - cur.SetWrite(kAccessIsWrite); - cur.SetAtomic(kIsAtomic); + FastState fast_state = thr->fast_state; + Shadow cur(fast_state, addr, size, typ); - if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch, - kAccessIsWrite))) { + LOAD_CURRENT_SHADOW(cur, shadow_mem); + if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) return; - } - - if (kCollectHistory) { - fast_state.IncrementEpoch(); - thr->fast_state = fast_state; - TraceAddEvent(thr, fast_state, EventTypeMop, pc); - cur.IncrementEpoch(); - } + if (UNLIKELY(fast_state.GetIgnoreBit())) + return; + if (!TryTraceMemoryAccess(thr, pc, addr, size, typ)) + return TraceRestartMemoryAccess(thr, pc, addr, size, typ); + CheckRaces(thr, shadow_mem, cur, shadow, access, typ); +} - MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, - shadow_mem, cur); +NOINLINE +void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr, + uptr size, AccessType typ) { + TraceSwitchPart(thr); + UnalignedMemoryAccess(thr, pc, addr, size, typ); } -// Called by MemoryAccessRange in tsan_rtl_thread.cpp -ALWAYS_INLINE USED void MemoryAccessImpl(ThreadState *thr, uptr addr, - int kAccessSizeLog, - bool kAccessIsWrite, bool kIsAtomic, - u64 *shadow_mem, Shadow cur) { - if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch, - kAccessIsWrite))) { +ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc, + uptr addr, uptr size, + AccessType typ) { + DCHECK_LE(size, 8); + FastState fast_state = thr->fast_state; + if (UNLIKELY(fast_state.GetIgnoreBit())) return; + RawShadow* shadow_mem = MemToShadow(addr); + bool traced = false; + uptr size1 = Min(size, RoundUp(addr + 1, kShadowCell) - addr); + { + Shadow cur(fast_state, addr, size1, typ); + LOAD_CURRENT_SHADOW(cur, shadow_mem); + if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) + goto SECOND; + if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) + return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); + traced = true; + if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) + return; } +SECOND: + uptr size2 = size - size1; + if (LIKELY(size2 == 0)) + return; + shadow_mem += kShadowCnt; + Shadow cur(fast_state, 0, size2, typ); + LOAD_CURRENT_SHADOW(cur, shadow_mem); + if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) + return; + if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) + return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); + CheckRaces(thr, shadow_mem, cur, shadow, access, typ); +} - MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, - shadow_mem, cur); +void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) { + DCHECK_LE(p, end); + DCHECK(IsShadowMem(p)); + DCHECK(IsShadowMem(end)); + UNUSED const uptr kAlign = kShadowCnt * kShadowSize; + DCHECK_EQ(reinterpret_cast(p) % kAlign, 0); + DCHECK_EQ(reinterpret_cast(end) % kAlign, 0); +#if !TSAN_VECTORIZE + for (; p < end; p += kShadowCnt) { + p[0] = v; + for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty; + } +#else + m128 vv = _mm_setr_epi32( + static_cast(v), static_cast(Shadow::kEmpty), + static_cast(Shadow::kEmpty), static_cast(Shadow::kEmpty)); + m128* vp = reinterpret_cast(p); + m128* vend = reinterpret_cast(end); + for (; vp < vend; vp++) _mm_store_si128(vp, vv); +#endif } -static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size, - u64 val) { - (void)thr; - (void)pc; +static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) { if (size == 0) return; - // FIXME: fix me. - uptr offset = addr % kShadowCell; - if (offset) { - offset = kShadowCell - offset; - if (size <= offset) - return; - addr += offset; - size -= offset; - } - DCHECK_EQ(addr % 8, 0); + DCHECK_EQ(addr % kShadowCell, 0); + DCHECK_EQ(size % kShadowCell, 0); // If a user passes some insane arguments (memset(0)), // let it just crash as usual. if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) return; + RawShadow* begin = MemToShadow(addr); + RawShadow* end = begin + size / kShadowCell * kShadowCnt; // Don't want to touch lots of shadow memory. // If a program maps 10MB stack, there is no need reset the whole range. - size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1); // UnmapOrDie/MmapFixedNoReserve does not work on Windows. - if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) { - RawShadow *p = MemToShadow(addr); - CHECK(IsShadowMem(p)); - CHECK(IsShadowMem(p + size * kShadowCnt / kShadowCell - 1)); - // FIXME: may overwrite a part outside the region - for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) { - p[i++] = val; - for (uptr j = 1; j < kShadowCnt; j++) p[i++] = 0; - } - } else { - // The region is big, reset only beginning and end. - const uptr kPageSize = GetPageSizeCached(); - RawShadow *begin = MemToShadow(addr); - RawShadow *end = begin + size / kShadowCell * kShadowCnt; - RawShadow *p = begin; - // Set at least first kPageSize/2 to page boundary. - while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) { - *p++ = val; - for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0; - } - // Reset middle part. - RawShadow *p1 = p; - p = RoundDown(end, kPageSize); - if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1)) + if (SANITIZER_WINDOWS || + size <= common_flags()->clear_shadow_mmap_threshold) { + ShadowSet(begin, end, val); + return; + } + // The region is big, reset only beginning and end. + const uptr kPageSize = GetPageSizeCached(); + // Set at least first kPageSize/2 to page boundary. + RawShadow* mid1 = + Min(end, reinterpret_cast(RoundUp( + reinterpret_cast(begin) + kPageSize / 2, kPageSize))); + ShadowSet(begin, mid1, val); + // Reset middle part. + RawShadow* mid2 = RoundDown(end, kPageSize); + if (mid2 > mid1) { + if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1)) Die(); - // Set the ending. - while (p < end) { - *p++ = val; - for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0; - } } + // Set the ending. + ShadowSet(mid2, end, val); } -void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) { - MemoryRangeSet(thr, pc, addr, size, 0); +void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) { + uptr addr1 = RoundDown(addr, kShadowCell); + uptr size1 = RoundUp(size + addr - addr1, kShadowCell); + MemoryRangeSet(addr1, size1, Shadow::kEmpty); } -void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) { - // Processing more than 1k (4k of shadow) is expensive, +void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) { + // Callers must lock the slot to ensure synchronization with the reset. + // The problem with "freed" memory is that it's not "monotonic" + // with respect to bug detection: freed memory is bad to access, + // but then if the heap block is reallocated later, it's good to access. + // As the result a garbage "freed" shadow can lead to a false positive + // if it happens to match a real free in the thread trace, + // but the heap block was reallocated before the current memory access, + // so it's still good to access. It's not the case with data races. + DCHECK(thr->slot_locked); + DCHECK_EQ(addr % kShadowCell, 0); + size = RoundUp(size, kShadowCell); + // Processing more than 1k (2k of shadow) is expensive, // can cause excessive memory consumption (user does not necessary touch // the whole range) and most likely unnecessary. - if (size > 1024) - size = 1024; - CHECK_EQ(thr->is_freeing, false); - thr->is_freeing = true; - MemoryAccessRange(thr, pc, addr, size, true); - thr->is_freeing = false; - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); + size = Min(size, 1024); + const AccessType typ = + kAccessWrite | kAccessFree | kAccessCheckOnly | kAccessNoRodata; + TraceMemoryAccessRange(thr, pc, addr, size, typ); + RawShadow* shadow_mem = MemToShadow(addr); + Shadow cur(thr->fast_state, 0, kShadowCell, typ); +#if TSAN_VECTORIZE + const m128 access = _mm_set1_epi32(static_cast(cur.raw())); + const m128 freed = _mm_setr_epi32( + static_cast(Shadow::FreedMarker()), + static_cast(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0); + for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { + const m128 shadow = _mm_load_si128((m128*)shadow_mem); + if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) + return; + _mm_store_si128((m128*)shadow_mem, freed); } - Shadow s(thr->fast_state); - s.ClearIgnoreBit(); - s.MarkAsFreed(); - s.SetWrite(true); - s.SetAddr0AndSizeLog(0, 3); - MemoryRangeSet(thr, pc, addr, size, s.raw()); -} - -void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) { - if (kCollectHistory) { - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); +#else + for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { + if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ))) + return; + StoreShadow(&shadow_mem[0], Shadow::FreedMarker()); + StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch())); + StoreShadow(&shadow_mem[2], Shadow::kEmpty); + StoreShadow(&shadow_mem[3], Shadow::kEmpty); } - Shadow s(thr->fast_state); - s.ClearIgnoreBit(); - s.SetWrite(true); - s.SetAddr0AndSizeLog(0, 3); - MemoryRangeSet(thr, pc, addr, size, s.raw()); +#endif +} + +void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) { + DCHECK_EQ(addr % kShadowCell, 0); + size = RoundUp(size, kShadowCell); + TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite); + Shadow cur(thr->fast_state, 0, 8, kAccessWrite); + MemoryRangeSet(addr, size, cur.raw()); } -void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr, +void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) { if (thr->ignore_reads_and_writes == 0) MemoryRangeImitateWrite(thr, pc, addr, size); @@ -518,14 +617,29 @@ MemoryResetRange(thr, pc, addr, size); } -void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, - bool is_write) { - if (size == 0) - return; +ALWAYS_INLINE +bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, + AccessType typ) { + LOAD_CURRENT_SHADOW(cur, shadow_mem); + if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) + return false; + return CheckRaces(thr, shadow_mem, cur, shadow, access, typ); +} + +template +NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, + uptr size) { + TraceSwitchPart(thr); + MemoryAccessRangeT(thr, pc, addr, size); +} - RawShadow *shadow_mem = MemToShadow(addr); - DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n", thr->tid, - (void *)pc, (void *)addr, (int)size, is_write); +template +void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) { + const AccessType typ = + (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata; + RawShadow* shadow_mem = MemToShadow(addr); + DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid, + (void*)pc, (void*)addr, (int)size, is_read); #if SANITIZER_DEBUG if (!IsAppMem(addr)) { @@ -537,65 +651,57 @@ DCHECK(IsAppMem(addr + size - 1)); } if (!IsShadowMem(shadow_mem)) { - Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr); + Printf("Bad shadow addr %p (%zx)\n", static_cast(shadow_mem), addr); DCHECK(IsShadowMem(shadow_mem)); } - if (!IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)) { - Printf("Bad shadow addr %p (%zx)\n", shadow_mem + size * kShadowCnt / 8 - 1, + if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) { + Printf("Bad shadow addr %p (%zx)\n", + static_cast(shadow_mem + size * kShadowCnt - 1), addr + size - 1); - DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)); + DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1)); } #endif - if (*shadow_mem == kShadowRodata) { - DCHECK(!is_write); - // Access to .rodata section, no races here. - // Measurements show that it can be 10-20% of all memory accesses. + // Access to .rodata section, no races here. + // Measurements show that it can be 10-20% of all memory accesses. + if (is_read && *shadow_mem == Shadow::kRodata) return; - } FastState fast_state = thr->fast_state; - if (fast_state.GetIgnoreBit()) + if (UNLIKELY(fast_state.GetIgnoreBit())) return; - fast_state.IncrementEpoch(); - thr->fast_state = fast_state; - TraceAddEvent(thr, fast_state, EventTypeMop, pc); + if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) + return RestartMemoryAccessRange(thr, pc, addr, size); - bool unaligned = (addr % kShadowCell) != 0; - - // Handle unaligned beginning, if any. - for (; addr % kShadowCell && size; addr++, size--) { - int const kAccessSizeLog = 0; - Shadow cur(fast_state); - cur.SetWrite(is_write); - cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog); - MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem, - cur); - } - if (unaligned) + if (UNLIKELY(addr % kShadowCell)) { + // Handle unaligned beginning, if any. + uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr); + size -= size1; + Shadow cur(fast_state, addr, size1, typ); + if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) + return; shadow_mem += kShadowCnt; + } // Handle middle part, if any. - for (; size >= kShadowCell; addr += kShadowCell, size -= kShadowCell) { - int const kAccessSizeLog = 3; - Shadow cur(fast_state); - cur.SetWrite(is_write); - cur.SetAddr0AndSizeLog(0, kAccessSizeLog); - MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem, - cur); - shadow_mem += kShadowCnt; + Shadow cur(fast_state, 0, kShadowCell, typ); + for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) { + if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) + return; } // Handle ending, if any. - for (; size; addr++, size--) { - int const kAccessSizeLog = 0; - Shadow cur(fast_state); - cur.SetWrite(is_write); - cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog); - MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem, - cur); + if (UNLIKELY(size)) { + Shadow cur(fast_state, 0, size, typ); + if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) + return; } } +template void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, + uptr size); +template void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, + uptr size); + } // namespace __tsan #if !SANITIZER_GO diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp @@ -23,6 +23,8 @@ namespace __tsan { void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r); +void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr, + FastState last_lock, StackID creation_stack_id); struct Callback final : public DDCallback { ThreadState *thr; @@ -36,17 +38,17 @@ } StackID Unwind() override { return CurrentStackId(thr, pc); } - int UniqueTid() override { return thr->unique_id; } + int UniqueTid() override { return thr->tid; } }; void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s) { Callback cb(thr, pc); ctx->dd->MutexInit(&cb, &s->dd); - s->dd.ctx = s->GetId(); + s->dd.ctx = s->addr; } static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ, - uptr addr, u64 mid) { + uptr addr, StackID creation_stack_id) { // In Go, these misuses are either impossible, or detected by std lib, // or false positives (e.g. unlock in a different thread). if (SANITIZER_GO) @@ -55,7 +57,7 @@ return; ThreadRegistryLock l(&ctx->thread_registry); ScopedReport rep(typ); - rep.AddMutex(mid); + rep.AddMutex(addr, creation_stack_id); VarSizeStackTrace trace; ObtainCurrentStack(thr, pc, &trace); rep.AddStack(trace, true); @@ -63,95 +65,93 @@ OutputReport(thr, rep); } +static void RecordMutexLock(ThreadState *thr, uptr pc, uptr addr, + StackID stack_id, bool write) { + auto typ = write ? EventType::kLock : EventType::kRLock; + // Note: it's important to trace before modifying mutex set + // because tracing can switch trace part and we write the current + // mutex set in the beginning of each part. + // If we do it in the opposite order, we will write already reduced + // mutex set in the beginning of the part and then trace unlock again. + TraceMutexLock(thr, typ, pc, addr, stack_id); + thr->mset.AddAddr(addr, stack_id, write); +} + +static void RecordMutexUnlock(ThreadState *thr, uptr addr) { + // See the comment in RecordMutexLock re order of operations. + TraceMutexUnlock(thr, addr); + thr->mset.DelAddr(addr); +} + void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexCreate %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (!(flagz & MutexFlagLinkerInit) && IsAppMem(addr)) { - CHECK(!thr->is_freeing); - thr->is_freeing = true; + if (!(flagz & MutexFlagLinkerInit) && pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessWrite); - thr->is_freeing = false; - } - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); s->SetFlags(flagz & MutexCreationFlagMask); // Save stack in the case the sync object was created before as atomic. - if (!SANITIZER_GO && s->creation_stack_id == 0) + if (!SANITIZER_GO && s->creation_stack_id == kInvalidStackID) s->creation_stack_id = CurrentStackId(thr, pc); } void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexDestroy %zx\n", thr->tid, addr); bool unlock_locked = false; - u64 mid = 0; - u64 last_lock = 0; + StackID creation_stack_id; + FastState last_lock; { - SyncVar *s = ctx->metamap.GetSyncIfExists(addr); - if (s == 0) + auto s = ctx->metamap.GetSyncIfExists(addr); + if (!s) return; - Lock l(&s->mtx); - if ((flagz & MutexFlagLinkerInit) || s->IsFlagSet(MutexFlagLinkerInit) || - ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) { - // Destroy is no-op for linker-initialized mutexes. - return; - } - if (common_flags()->detect_deadlocks) { - Callback cb(thr, pc); - ctx->dd->MutexDestroy(&cb, &s->dd); - ctx->dd->MutexInit(&cb, &s->dd); - } - if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid && - !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - unlock_locked = true; - } - mid = s->GetId(); - last_lock = s->last_lock; - if (!unlock_locked) - s->Reset(thr->proc()); // must not reset it before the report is printed - } - if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) { - ThreadRegistryLock l(&ctx->thread_registry); - ScopedReport rep(ReportTypeMutexDestroyLocked); - rep.AddMutex(mid); - VarSizeStackTrace trace; - ObtainCurrentStack(thr, pc, &trace); - rep.AddStack(trace, true); - FastState last(last_lock); - RestoreStack(last.tid(), last.epoch(), &trace, 0); - rep.AddStack(trace, true); - rep.AddLocation(addr, 1); - OutputReport(thr, rep); - - SyncVar *s = ctx->metamap.GetSyncIfExists(addr); - if (s != 0) { - Lock l(&s->mtx); - s->Reset(thr->proc()); + SlotLocker locker(thr); + { + Lock lock(&s->mtx); + creation_stack_id = s->creation_stack_id; + last_lock = s->last_lock; + if ((flagz & MutexFlagLinkerInit) || s->IsFlagSet(MutexFlagLinkerInit) || + ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) { + // Destroy is no-op for linker-initialized mutexes. + return; + } + if (common_flags()->detect_deadlocks) { + Callback cb(thr, pc); + ctx->dd->MutexDestroy(&cb, &s->dd); + ctx->dd->MutexInit(&cb, &s->dd); + } + if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid && + !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + unlock_locked = true; + } + s->Reset(); } + // Imitate a memory write to catch unlock-destroy races. + if (pc && IsAppMem(addr)) + MemoryAccess(thr, pc, addr, 1, kAccessWrite | kAccessFree); } - thr->mset.Remove(mid); - // Imitate a memory write to catch unlock-destroy races. - // Do this outside of sync mutex, because it can report a race which locks - // sync mutexes. - if (IsAppMem(addr)) - MemoryAccess(thr, pc, addr, 1, kAccessWrite | kAccessFree); + if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) + ReportDestroyLocked(thr, pc, addr, last_lock, creation_stack_id); + thr->mset.DelAddr(addr, true); // s will be destroyed and freed in MetaMap::FreeBlock. } void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexPreLock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - { - ReadLock l(&s->mtx); - s->UpdateFlags(flagz); - if (s->owner_tid != thr->tid) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeLock(&cb, &s->dd, true); - } - } - Callback cb(thr, pc); - ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); + if (flagz & MutexFlagTryLock) + return; + if (!common_flags()->detect_deadlocks) + return; + Callback cb(thr, pc); + { + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + ReadLock lock(&s->mtx); + s->UpdateFlags(flagz); + if (s->owner_tid != thr->tid) + ctx->dd->MutexBeforeLock(&cb, &s->dd, true); } + ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); } void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) { @@ -161,48 +161,51 @@ CHECK_GT(rec, 0); else rec = 1; - if (IsAppMem(addr)) + if (pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - u64 mid = 0; + bool report_double_lock = false; bool pre_lock = false; bool first = false; - bool report_double_lock = false; + StackID creation_stack_id = kInvalidStackID; { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); - s->UpdateFlags(flagz); - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId()); - if (s->owner_tid == kInvalidTid) { - CHECK_EQ(s->recursion, 0); - s->owner_tid = thr->tid; - s->last_lock = thr->fast_state.raw(); - } else if (s->owner_tid == thr->tid) { - CHECK_GT(s->recursion, 0); - } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_double_lock = true; - } - first = s->recursion == 0; - s->recursion += rec; - if (first) { - AcquireImpl(thr, pc, &s->clock); - AcquireImpl(thr, pc, &s->read_clock); - } else if (!s->IsFlagSet(MutexFlagWriteReentrant)) { - } - thr->mset.Add(s->GetId(), true, thr->fast_state.epoch()); - if (first && common_flags()->detect_deadlocks) { - pre_lock = - (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock); - Callback cb(thr, pc); - if (pre_lock) - ctx->dd->MutexBeforeLock(&cb, &s->dd, true); - ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock); + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + creation_stack_id = s->creation_stack_id; + RecordMutexLock(thr, pc, addr, creation_stack_id, true); + { + Lock lock(&s->mtx); + first = s->recursion == 0; + s->UpdateFlags(flagz); + if (s->owner_tid == kInvalidTid) { + CHECK_EQ(s->recursion, 0); + s->owner_tid = thr->tid; + s->last_lock = thr->fast_state; + } else if (s->owner_tid == thr->tid) { + CHECK_GT(s->recursion, 0); + } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_double_lock = true; + } + s->recursion += rec; + if (first) { + if (!thr->ignore_sync) { + thr->clock.Acquire(s->clock); + thr->clock.Acquire(s->read_clock); + } + } + if (first && common_flags()->detect_deadlocks) { + pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) && + !(flagz & MutexFlagTryLock); + Callback cb(thr, pc); + if (pre_lock) + ctx->dd->MutexBeforeLock(&cb, &s->dd, true); + ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock); + } } - mid = s->GetId(); } if (report_double_lock) - ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr, mid); + ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr, + creation_stack_id); if (first && pre_lock && common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -211,40 +214,47 @@ int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexUnlock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (IsAppMem(addr)) + if (pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - u64 mid = 0; + StackID creation_stack_id; + RecordMutexUnlock(thr, addr); bool report_bad_unlock = false; int rec = 0; { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId()); - if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) { - if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_unlock = true; - } - } else { - rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1; - s->recursion -= rec; - if (s->recursion == 0) { - s->owner_tid = kInvalidTid; - ReleaseStoreImpl(thr, pc, &s->clock); + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + bool released = false; + { + Lock lock(&s->mtx); + creation_stack_id = s->creation_stack_id; + if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) { + if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_unlock = true; + } } else { + rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1; + s->recursion -= rec; + if (s->recursion == 0) { + s->owner_tid = kInvalidTid; + if (!thr->ignore_sync) { + thr->clock.ReleaseStore(&s->clock); + released = true; + } + } + } + if (common_flags()->detect_deadlocks && s->recursion == 0 && + !report_bad_unlock) { + Callback cb(thr, pc); + ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true); } } - thr->mset.Del(s->GetId(), true); - if (common_flags()->detect_deadlocks && s->recursion == 0 && - !report_bad_unlock) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true); - } - mid = s->GetId(); + if (released) + IncrementEpoch(thr); } if (report_bad_unlock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, + creation_stack_id); if (common_flags()->detect_deadlocks && !report_bad_unlock) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -254,53 +264,56 @@ void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexPreReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) { - { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - ReadLock l(&s->mtx); - s->UpdateFlags(flagz); - Callback cb(thr, pc); - ctx->dd->MutexBeforeLock(&cb, &s->dd, false); - } - Callback cb(thr, pc); - ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); + if ((flagz & MutexFlagTryLock) || !common_flags()->detect_deadlocks) + return; + Callback cb(thr, pc); + { + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + ReadLock lock(&s->mtx); + s->UpdateFlags(flagz); + ctx->dd->MutexBeforeLock(&cb, &s->dd, false); } + ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); } void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexPostReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (IsAppMem(addr)) + if (pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - u64 mid = 0; bool report_bad_lock = false; bool pre_lock = false; + StackID creation_stack_id = kInvalidStackID; { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - ReadLock l(&s->mtx); - s->UpdateFlags(flagz); - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId()); - if (s->owner_tid != kInvalidTid) { - if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_lock = true; + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + creation_stack_id = s->creation_stack_id; + RecordMutexLock(thr, pc, addr, creation_stack_id, false); + { + ReadLock lock(&s->mtx); + s->UpdateFlags(flagz); + if (s->owner_tid != kInvalidTid) { + if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_lock = true; + } + } + if (!thr->ignore_sync) + thr->clock.Acquire(s->clock); + s->last_lock = thr->fast_state; + if (common_flags()->detect_deadlocks) { + pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) && + !(flagz & MutexFlagTryLock); + Callback cb(thr, pc); + if (pre_lock) + ctx->dd->MutexBeforeLock(&cb, &s->dd, false); + ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock); } } - AcquireImpl(thr, pc, &s->clock); - s->last_lock = thr->fast_state.raw(); - thr->mset.Add(s->GetId(), false, thr->fast_state.epoch()); - if (common_flags()->detect_deadlocks) { - pre_lock = - (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock); - Callback cb(thr, pc); - if (pre_lock) - ctx->dd->MutexBeforeLock(&cb, &s->dd, false); - ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock); - } - mid = s->GetId(); } if (report_bad_lock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr, mid); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr, + creation_stack_id); if (pre_lock && common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -309,31 +322,39 @@ void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexReadUnlock %zx\n", thr->tid, addr); - if (IsAppMem(addr)) + if (pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - u64 mid = 0; + RecordMutexUnlock(thr, addr); + StackID creation_stack_id; bool report_bad_unlock = false; { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId()); - if (s->owner_tid != kInvalidTid) { - if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_unlock = true; + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + bool released = false; + { + Lock lock(&s->mtx); + creation_stack_id = s->creation_stack_id; + if (s->owner_tid != kInvalidTid) { + if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_unlock = true; + } + } + if (!thr->ignore_sync) { + thr->clock.Release(&s->read_clock); + released = true; + } + if (common_flags()->detect_deadlocks && s->recursion == 0) { + Callback cb(thr, pc); + ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false); } } - ReleaseImpl(thr, pc, &s->read_clock); - if (common_flags()->detect_deadlocks && s->recursion == 0) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false); - } - mid = s->GetId(); + if (released) + IncrementEpoch(thr); } - thr->mset.Del(mid, false); if (report_bad_unlock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadUnlock, addr, mid); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadUnlock, addr, + creation_stack_id); if (common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -342,44 +363,52 @@ void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexReadOrWriteUnlock %zx\n", thr->tid, addr); - if (IsAppMem(addr)) + if (pc && IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - u64 mid = 0; + RecordMutexUnlock(thr, addr); + StackID creation_stack_id; bool report_bad_unlock = false; + bool write = true; { - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); - bool write = true; - if (s->owner_tid == kInvalidTid) { - // Seems to be read unlock. - write = false; - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId()); - ReleaseImpl(thr, pc, &s->read_clock); - } else if (s->owner_tid == thr->tid) { - // Seems to be write unlock. - thr->fast_state.IncrementEpoch(); - TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId()); - CHECK_GT(s->recursion, 0); - s->recursion--; - if (s->recursion == 0) { - s->owner_tid = kInvalidTid; - ReleaseStoreImpl(thr, pc, &s->clock); - } else { + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + bool released = false; + { + Lock lock(&s->mtx); + creation_stack_id = s->creation_stack_id; + if (s->owner_tid == kInvalidTid) { + // Seems to be read unlock. + write = false; + if (!thr->ignore_sync) { + thr->clock.Release(&s->read_clock); + released = true; + } + } else if (s->owner_tid == thr->tid) { + // Seems to be write unlock. + CHECK_GT(s->recursion, 0); + s->recursion--; + if (s->recursion == 0) { + s->owner_tid = kInvalidTid; + if (!thr->ignore_sync) { + thr->clock.ReleaseStore(&s->clock); + released = true; + } + } + } else if (!s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_unlock = true; + } + if (common_flags()->detect_deadlocks && s->recursion == 0) { + Callback cb(thr, pc); + ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write); } - } else if (!s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_unlock = true; - } - thr->mset.Del(s->GetId(), write); - if (common_flags()->detect_deadlocks && s->recursion == 0) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write); } - mid = s->GetId(); + if (released) + IncrementEpoch(thr); } if (report_bad_unlock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, + creation_stack_id); if (common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -388,143 +417,112 @@ void MutexRepair(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexRepair %zx\n", thr->tid, addr); - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock l(&s->mtx); + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + Lock lock(&s->mtx); s->owner_tid = kInvalidTid; s->recursion = 0; } void MutexInvalidAccess(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexInvalidAccess %zx\n", thr->tid, addr); - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr, s->GetId()); + StackID creation_stack_id = kInvalidStackID; + { + SlotLocker locker(thr); + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + if (s) + creation_stack_id = s->creation_stack_id; + } + ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr, + creation_stack_id); } void Acquire(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: Acquire %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - SyncVar *s = ctx->metamap.GetSyncIfExists(addr); + auto s = ctx->metamap.GetSyncIfExists(addr); if (!s) return; - ReadLock l(&s->mtx); - AcquireImpl(thr, pc, &s->clock); -} - -static void UpdateClockCallback(ThreadContextBase *tctx_base, void *arg) { - ThreadState *thr = reinterpret_cast(arg); - ThreadContext *tctx = static_cast(tctx_base); - u64 epoch = tctx->epoch1; - if (tctx->status == ThreadStatusRunning) { - epoch = tctx->thr->fast_state.epoch(); - tctx->thr->clock.NoteGlobalAcquire(epoch); - } - thr->clock.set(&thr->proc()->clock_cache, tctx->tid, epoch); + SlotLocker locker(thr); + if (!s->clock) + return; + ReadLock lock(&s->mtx); + thr->clock.Acquire(s->clock); } void AcquireGlobal(ThreadState *thr) { DPrintf("#%d: AcquireGlobal\n", thr->tid); if (thr->ignore_sync) return; - ThreadRegistryLock l(&ctx->thread_registry); - ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateClockCallback, thr); -} - -void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) { - DPrintf("#%d: ReleaseStoreAcquire %zx\n", thr->tid, addr); - if (thr->ignore_sync) - return; - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - ReleaseStoreAcquireImpl(thr, pc, &s->clock); + SlotLocker locker(thr); + for (auto &slot : ctx->slots) thr->clock.Set(slot.sid, slot.epoch()); } void Release(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: Release %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - ReleaseImpl(thr, pc, &s->clock); + SlotLocker locker(thr); + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); + Lock lock(&s->mtx); + thr->clock.Release(&s->clock); + } + IncrementEpoch(thr); } void ReleaseStore(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: ReleaseStore %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); - Lock l(&s->mtx); - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - ReleaseStoreImpl(thr, pc, &s->clock); -} - -#if !SANITIZER_GO -static void UpdateSleepClockCallback(ThreadContextBase *tctx_base, void *arg) { - ThreadState *thr = reinterpret_cast(arg); - ThreadContext *tctx = static_cast(tctx_base); - u64 epoch = tctx->epoch1; - if (tctx->status == ThreadStatusRunning) - epoch = tctx->thr->fast_state.epoch(); - thr->last_sleep_clock.set(&thr->proc()->clock_cache, tctx->tid, epoch); -} - -void AfterSleep(ThreadState *thr, uptr pc) { - DPrintf("#%d: AfterSleep\n", thr->tid); - if (thr->ignore_sync) - return; - thr->last_sleep_stack_id = CurrentStackId(thr, pc); - ThreadRegistryLock l(&ctx->thread_registry); - ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateSleepClockCallback, - thr); -} -#endif - -void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) { - if (thr->ignore_sync) - return; - thr->clock.set(thr->fast_state.epoch()); - thr->clock.acquire(&thr->proc()->clock_cache, c); -} - -void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) { - if (thr->ignore_sync) - return; - thr->clock.set(thr->fast_state.epoch()); - thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.releaseStoreAcquire(&thr->proc()->clock_cache, c); + SlotLocker locker(thr); + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); + Lock lock(&s->mtx); + thr->clock.ReleaseStore(&s->clock); + } + IncrementEpoch(thr); } -void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) { +void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) { + DPrintf("#%d: ReleaseStoreAcquire %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - thr->clock.set(thr->fast_state.epoch()); - thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.release(&thr->proc()->clock_cache, c); + SlotLocker locker(thr); + { + auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); + Lock lock(&s->mtx); + thr->clock.ReleaseStoreAcquire(&s->clock); + } + IncrementEpoch(thr); } -void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c) { - if (thr->ignore_sync) - return; - thr->clock.set(thr->fast_state.epoch()); - thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.ReleaseStore(&thr->proc()->clock_cache, c); +void IncrementEpoch(ThreadState *thr) { + DCHECK(!thr->ignore_sync); + DCHECK(thr->slot_locked); + Epoch epoch = EpochInc(thr->fast_state.epoch()); + if (!EpochOverflow(epoch)) { + Sid sid = thr->fast_state.sid(); + thr->clock.Set(sid, epoch); + thr->fast_state.SetEpoch(epoch); + thr->slot->SetEpoch(epoch); + TraceTime(thr); + } } -void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) { +#if !SANITIZER_GO +void AfterSleep(ThreadState *thr, uptr pc) { + DPrintf("#%d: AfterSleep\n", thr->tid); if (thr->ignore_sync) return; - thr->clock.set(thr->fast_state.epoch()); - thr->fast_synch_epoch = thr->fast_state.epoch(); - thr->clock.acq_rel(&thr->proc()->clock_cache, c); + thr->last_sleep_stack_id = CurrentStackId(thr, pc); + thr->last_sleep_clock.Reset(); + SlotLocker locker(thr); + for (auto &slot : ctx->slots) + thr->last_sleep_clock.Set(slot.sid, slot.epoch()); } +#endif void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) { if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock)) @@ -532,7 +530,7 @@ ThreadRegistryLock l(&ctx->thread_registry); ScopedReport rep(ReportTypeDeadlock); for (int i = 0; i < r->n; i++) { - rep.AddMutex(r->loop[i].mtx_ctx0); + rep.AddMutex(r->loop[i].mtx_ctx0, r->loop[i].stk[0]); rep.AddUniqueTid((int)r->loop[i].thr_ctx); rep.AddThread((int)r->loop[i].thr_ctx); } @@ -540,7 +538,7 @@ for (int i = 0; i < r->n; i++) { for (int j = 0; j < (flags()->second_deadlock_stack ? 2 : 1); j++) { u32 stk = r->loop[i].stk[j]; - if (stk && stk != 0xffffffff) { + if (stk && stk != kInvalidStackID) { rep.AddStack(StackDepotGet(stk), true); } else { // Sometimes we fail to extract the stack trace (FIXME: investigate), @@ -552,4 +550,26 @@ OutputReport(thr, rep); } +void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr, + FastState last_lock, StackID creation_stack_id) { + SlotPairLocker locker(thr, last_lock.sid()); + ThreadRegistryLock l0(&ctx->thread_registry); + Lock slots_lock(&ctx->slot_mtx); + ScopedReport rep(ReportTypeMutexDestroyLocked); + rep.AddMutex(addr, creation_stack_id); + VarSizeStackTrace trace; + ObtainCurrentStack(thr, pc, &trace); + rep.AddStack(trace, true); + + Tid tid; + DynamicMutexSet mset; + uptr tag; + if (!RestoreStack(EventType::kLock, last_lock.sid(), last_lock.epoch(), addr, + 0, kAccessWrite, &tid, &trace, mset, &tag)) + return; + rep.AddStack(trace, true); + rep.AddLocation(addr, 1); + OutputReport(thr, rep); +} + } // namespace __tsan diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_proc.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_proc.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_proc.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_proc.cpp @@ -35,7 +35,6 @@ #if !SANITIZER_GO AllocatorProcFinish(proc); #endif - ctx->clock_alloc.FlushCache(&proc->clock_cache); ctx->metamap.OnProcIdle(proc); if (common_flags()->detect_deadlocks) ctx->dd->DestroyPhysicalThread(proc->dd_pt); diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp @@ -175,22 +175,26 @@ } void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, - StackTrace stack, const MutexSet *mset) { + Tid tid, StackTrace stack, + const MutexSet *mset) { + uptr addr0, size; + AccessType typ; + s.GetAccess(&addr0, &size, &typ); auto *mop = New(); rep_->mops.PushBack(mop); - mop->tid = s.tid(); - mop->addr = addr + s.addr0(); - mop->size = s.size(); - mop->write = s.IsWrite(); - mop->atomic = s.IsAtomic(); + mop->tid = tid; + mop->addr = addr + addr0; + mop->size = size; + mop->write = !(typ & kAccessRead); + mop->atomic = typ & kAccessAtomic; mop->stack = SymbolizeStack(stack); mop->external_tag = external_tag; if (mop->stack) mop->stack->suppressable = true; for (uptr i = 0; i < mset->Size(); i++) { MutexSet::Desc d = mset->Get(i); - u64 mid = this->AddMutex(d.id); - ReportMopMutex mtx = {mid, d.write}; + u64 id = this->AddMutex(d.addr, d.stack_id); + ReportMopMutex mtx = {id, d.write}; mop->mset.PushBack(mtx); } } @@ -219,18 +223,6 @@ } #if !SANITIZER_GO -static bool FindThreadByUidLockedCallback(ThreadContextBase *tctx, void *arg) { - int unique_id = *(int *)arg; - return tctx->unique_id == (u32)unique_id; -} - -static ThreadContext *FindThreadByUidLocked(Tid unique_id) { - ctx->thread_registry.CheckLocked(); - return static_cast( - ctx->thread_registry.FindThreadContextLocked( - FindThreadByUidLockedCallback, &unique_id)); -} - static ThreadContext *FindThreadByTidLocked(Tid tid) { ctx->thread_registry.CheckLocked(); return static_cast( @@ -262,55 +254,25 @@ } #endif -void ScopedReportBase::AddThread(Tid unique_tid, bool suppressable) { +void ScopedReportBase::AddThread(Tid tid, bool suppressable) { #if !SANITIZER_GO - if (const ThreadContext *tctx = FindThreadByUidLocked(unique_tid)) + if (const ThreadContext *tctx = FindThreadByTidLocked(tid)) AddThread(tctx, suppressable); #endif } -void ScopedReportBase::AddMutex(const SyncVar *s) { +int ScopedReportBase::AddMutex(uptr addr, StackID creation_stack_id) { for (uptr i = 0; i < rep_->mutexes.Size(); i++) { - if (rep_->mutexes[i]->id == s->uid) - return; + if (rep_->mutexes[i]->addr == addr) + return rep_->mutexes[i]->id; } auto *rm = New(); rep_->mutexes.PushBack(rm); - rm->id = s->uid; - rm->addr = s->addr; + rm->id = rep_->mutexes.Size() - 1; + rm->addr = addr; rm->destroyed = false; - rm->stack = SymbolizeStackId(s->creation_stack_id); -} - -u64 ScopedReportBase::AddMutex(u64 id) { - u64 uid = 0; - u64 mid = id; - uptr addr = SyncVar::SplitId(id, &uid); - SyncVar *s = ctx->metamap.GetSyncIfExists(addr); - // Check that the mutex is still alive. - // Another mutex can be created at the same address, - // so check uid as well. - if (s && s->CheckId(uid)) { - Lock l(&s->mtx); - mid = s->uid; - AddMutex(s); - } else { - AddDeadMutex(id); - } - return mid; -} - -void ScopedReportBase::AddDeadMutex(u64 id) { - for (uptr i = 0; i < rep_->mutexes.Size(); i++) { - if (rep_->mutexes[i]->id == id) - return; - } - auto *rm = New(); - rep_->mutexes.PushBack(rm); - rm->id = id; - rm->addr = 0; - rm->destroyed = true; - rm->stack = 0; + rm->stack = SymbolizeStackId(creation_stack_id); + return rm->id; } void ScopedReportBase::AddLocation(uptr addr, uptr size) { @@ -327,7 +289,7 @@ loc->tid = creat_tid; loc->stack = SymbolizeStackId(creat_stack); rep_->locs.PushBack(loc); - ThreadContext *tctx = FindThreadByUidLocked(creat_tid); + ThreadContext *tctx = FindThreadByTidLocked(creat_tid); if (tctx) AddThread(tctx); return; @@ -343,16 +305,15 @@ if (!b) b = JavaHeapBlock(addr, &block_begin); if (b != 0) { - ThreadContext *tctx = FindThreadByTidLocked(b->tid); auto *loc = New(); loc->type = ReportLocationHeap; loc->heap_chunk_start = (uptr)allocator()->GetBlockBegin((void *)addr); loc->heap_chunk_size = b->siz; loc->external_tag = b->tag; - loc->tid = tctx ? tctx->tid : b->tid; + loc->tid = b->tid; loc->stack = SymbolizeStackId(b->stk); rep_->locs.PushBack(loc); - if (tctx) + if (ThreadContext *tctx = FindThreadByTidLocked(b->tid)) AddThread(tctx); return; } @@ -387,71 +348,6 @@ ScopedReport::~ScopedReport() {} -void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk, - MutexSet *mset, uptr *tag) { - // This function restores stack trace and mutex set for the thread/epoch. - // It does so by getting stack trace and mutex set at the beginning of - // trace part, and then replaying the trace till the given epoch. - Trace* trace = ThreadTrace(tid); - ReadLock l(&trace->mtx); - const int partidx = (epoch / kTracePartSize) % TraceParts(); - TraceHeader* hdr = &trace->headers[partidx]; - if (epoch < hdr->epoch0 || epoch >= hdr->epoch0 + kTracePartSize) - return; - CHECK_EQ(RoundDown(epoch, kTracePartSize), hdr->epoch0); - const u64 epoch0 = RoundDown(epoch, TraceSize()); - const u64 eend = epoch % TraceSize(); - const u64 ebegin = RoundDown(eend, kTracePartSize); - DPrintf("#%d: RestoreStack epoch=%zu ebegin=%zu eend=%zu partidx=%d\n", - tid, (uptr)epoch, (uptr)ebegin, (uptr)eend, partidx); - Vector stack; - stack.Resize(hdr->stack0.size + 64); - for (uptr i = 0; i < hdr->stack0.size; i++) { - stack[i] = hdr->stack0.trace[i]; - DPrintf2(" #%02zu: pc=%zx\n", i, stack[i]); - } - if (mset) - *mset = hdr->mset0; - uptr pos = hdr->stack0.size; - Event *events = (Event*)GetThreadTrace(tid); - for (uptr i = ebegin; i <= eend; i++) { - Event ev = events[i]; - EventType typ = (EventType)(ev >> kEventPCBits); - uptr pc = (uptr)(ev & ((1ull << kEventPCBits) - 1)); - DPrintf2(" %zu typ=%d pc=%zx\n", i, typ, pc); - if (typ == EventTypeMop) { - stack[pos] = pc; - } else if (typ == EventTypeFuncEnter) { - if (stack.Size() < pos + 2) - stack.Resize(pos + 2); - stack[pos++] = pc; - } else if (typ == EventTypeFuncExit) { - if (pos > 0) - pos--; - } - if (mset) { - if (typ == EventTypeLock) { - mset->Add(pc, true, epoch0 + i); - } else if (typ == EventTypeUnlock) { - mset->Del(pc, true); - } else if (typ == EventTypeRLock) { - mset->Add(pc, false, epoch0 + i); - } else if (typ == EventTypeRUnlock) { - mset->Del(pc, false); - } - } - for (uptr j = 0; j <= pos; j++) - DPrintf2(" #%zu: %zx\n", j, stack[j]); - } - if (pos == 0 && stack[0] == 0) - return; - pos++; - stk->Init(&stack[0], pos); - ExtractTagFromStack(stk, tag); -} - -namespace v3 { - // Replays the trace up to last_pos position in the last part // or up to the provided epoch/sid (whichever is earlier) // and calls the provided function f for each event. @@ -469,6 +365,7 @@ Event *end = &part->events[TracePart::kSize - 1]; if (part == last) end = last_pos; + f(kFreeSid, kEpochOver, nullptr); // notify about part start for (Event *evp = &part->events[0]; evp < end; evp++) { Event *evp0 = evp; if (!evp->is_access && !evp->is_func) { @@ -528,21 +425,36 @@ return addr1 >= addr2 && addr1 + size1 <= addr2 + size2; } -// Replays the trace of thread tid up to the target event identified -// by sid/epoch/addr/size/typ and restores and returns stack, mutex set +// Replays the trace of slot sid up to the target event identified +// by epoch/addr/size/typ and restores and returns tid, stack, mutex set // and tag for that event. If there are multiple such events, it returns // the last one. Returns false if the event is not present in the trace. -bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr, - uptr size, AccessType typ, VarSizeStackTrace *pstk, +bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, + AccessType typ, Tid *ptid, VarSizeStackTrace *pstk, MutexSet *pmset, uptr *ptag) { // This function restores stack trace and mutex set for the thread/epoch. // It does so by getting stack trace and mutex set at the beginning of // trace part, and then replaying the trace till the given epoch. - DPrintf2("RestoreStack: tid=%u sid=%u@%u addr=0x%zx/%zu typ=%x\n", tid, + DPrintf2("RestoreStack: sid=%u@%u addr=0x%zx/%zu typ=%x\n", static_cast(sid), static_cast(epoch), addr, size, static_cast(typ)); ctx->slot_mtx.CheckLocked(); // needed to prevent trace part recycling ctx->thread_registry.CheckLocked(); + TidSlot *slot = &ctx->slots[static_cast(sid)]; + Tid tid = kInvalidTid; + // Need to lock the slot mutex as it protects slot->journal. + slot->mtx.CheckLocked(); + for (uptr i = 0; i < slot->journal.Size(); i++) { + DPrintf2(" journal: epoch=%d tid=%d\n", + static_cast(slot->journal[i].epoch), slot->journal[i].tid); + if (i == slot->journal.Size() - 1 || slot->journal[i + 1].epoch > epoch) { + tid = slot->journal[i].tid; + break; + } + } + if (tid == kInvalidTid) + return false; + *ptid = tid; ThreadContext *tctx = static_cast(ctx->thread_registry.GetThreadLocked(tid)); Trace *trace = &tctx->trace; @@ -553,8 +465,10 @@ { Lock lock(&trace->mtx); first_part = trace->parts.Front(); - if (!first_part) + if (!first_part) { + DPrintf2("RestoreStack: tid=%d trace=%p no trace parts\n", tid, trace); return false; + } last_part = trace->parts.Back(); last_pos = trace->final_pos; if (tctx->thr) @@ -567,9 +481,18 @@ bool is_read = typ & kAccessRead; bool is_atomic = typ & kAccessAtomic; bool is_free = typ & kAccessFree; + DPrintf2("RestoreStack: tid=%d parts=[%p-%p] last_pos=%p\n", tid, + trace->parts.Front(), last_part, last_pos); TraceReplay( trace, last_part, last_pos, sid, epoch, [&](Sid ev_sid, Epoch ev_epoch, Event *evp) { + if (evp == nullptr) { + // Each trace part is self-consistent, so we reset state. + stack.Resize(0); + mset->Reset(); + prev_pc = 0; + return; + } bool match = ev_sid == sid && ev_epoch == epoch; if (evp->is_access) { if (evp->is_func == 0 && evp->type == EventType::kAccessExt && @@ -592,12 +515,15 @@ if (evp->is_func) { auto *ev = reinterpret_cast(evp); if (ev->pc) { - DPrintf2(" FuncEnter: pc=0x%llx\n", ev->pc); + DPrintf2(" FuncEnter: pc=0x%llx\n", ev->pc); stack.PushBack(ev->pc); } else { - DPrintf2(" FuncExit\n"); - CHECK(stack.Size()); - stack.PopBack(); + DPrintf2(" FuncExit\n"); + // We don't log pathologically large stacks in each part, + // if the stack was truncated we can have more func exits than + // entries. + if (stack.Size()) + stack.PopBack(); } return; } @@ -666,8 +592,6 @@ return found; } -} // namespace v3 - bool RacyStacks::operator==(const RacyStacks &other) const { if (hash[0] == other.hash[0] && hash[1] == other.hash[1]) return true; @@ -758,10 +682,7 @@ ctx->fired_suppressions.push_back(s); } { - bool old_is_freeing = thr->is_freeing; - thr->is_freeing = false; bool suppressed = OnReport(rep, pc_or_addr != 0); - thr->is_freeing = old_is_freeing; if (suppressed) { thr->current_report = nullptr; return false; @@ -808,97 +729,91 @@ return false; } -static bool RaceBetweenAtomicAndFree(ThreadState *thr) { - Shadow s0(thr->racy_state[0]); - Shadow s1(thr->racy_state[1]); - CHECK(!(s0.IsAtomic() && s1.IsAtomic())); - if (!s0.IsAtomic() && !s1.IsAtomic()) - return true; - if (s0.IsAtomic() && s1.IsFreed()) - return true; - if (s1.IsAtomic() && thr->is_freeing) - return true; - return false; +// We need to lock the target slot during RestoreStack because it protects +// the slot journal. However, the target slot can be the slot of the current +// thread or a different slot. +SlotPairLocker::SlotPairLocker(ThreadState *thr, + Sid sid) NO_THREAD_SAFETY_ANALYSIS : thr_(thr), + slot_() { + CHECK_NE(sid, kFreeSid); + Lock l(&ctx->multi_slot_mtx); + SlotLock(thr); + if (sid == thr->slot->sid) + return; + slot_ = &ctx->slots[static_cast(sid)]; + slot_->mtx.Lock(); } -void ReportRace(ThreadState *thr) { +SlotPairLocker::~SlotPairLocker() NO_THREAD_SAFETY_ANALYSIS { + SlotUnlock(thr_); + if (slot_) + slot_->mtx.Unlock(); +} + +void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old, + AccessType typ0) { CheckedMutex::CheckNoLocks(); // Symbolizer makes lots of intercepted calls. If we try to process them, // at best it will cause deadlocks on internal mutexes. ScopedIgnoreInterceptors ignore; + uptr addr = ShadowToMem(shadow_mem); + DPrintf("#%d: ReportRace %p\n", thr->tid, (void *)addr); if (!ShouldReport(thr, ReportTypeRace)) return; - if (!flags()->report_atomic_races && !RaceBetweenAtomicAndFree(thr)) + uptr addr_off0, size0; + cur.GetAccess(&addr_off0, &size0, nullptr); + uptr addr_off1, size1, typ1; + old.GetAccess(&addr_off1, &size1, &typ1); + if (!flags()->report_atomic_races && + ((typ0 & kAccessAtomic) || (typ1 & kAccessAtomic)) && + !(typ0 & kAccessFree) && !(typ1 & kAccessFree)) return; - bool freed = false; - { - Shadow s(thr->racy_state[1]); - freed = s.GetFreedAndReset(); - thr->racy_state[1] = s.raw(); - } - - uptr addr = ShadowToMem(thr->racy_shadow_addr); - uptr addr_min = 0; - uptr addr_max = 0; - { - uptr a0 = addr + Shadow(thr->racy_state[0]).addr0(); - uptr a1 = addr + Shadow(thr->racy_state[1]).addr0(); - uptr e0 = a0 + Shadow(thr->racy_state[0]).size(); - uptr e1 = a1 + Shadow(thr->racy_state[1]).size(); - addr_min = min(a0, a1); - addr_max = max(e0, e1); - if (IsExpectedReport(addr_min, addr_max - addr_min)) - return; - } + const uptr kMop = 2; + Shadow s[kMop] = {cur, old}; + uptr addr0 = addr + addr_off0; + uptr addr1 = addr + addr_off1; + uptr end0 = addr0 + size0; + uptr end1 = addr1 + size1; + uptr addr_min = min(addr0, addr1); + uptr addr_max = max(end0, end1); + if (IsExpectedReport(addr_min, addr_max - addr_min)) + return; if (HandleRacyAddress(thr, addr_min, addr_max)) return; - ReportType typ = ReportTypeRace; - if (thr->is_vptr_access && freed) - typ = ReportTypeVptrUseAfterFree; - else if (thr->is_vptr_access) - typ = ReportTypeVptrRace; - else if (freed) - typ = ReportTypeUseAfterFree; + ReportType rep_typ = ReportTypeRace; + if ((typ0 & kAccessVptr) && (typ1 & kAccessFree)) + rep_typ = ReportTypeVptrUseAfterFree; + else if (typ0 & kAccessVptr) + rep_typ = ReportTypeVptrRace; + else if (typ1 & kAccessFree) + rep_typ = ReportTypeUseAfterFree; - if (IsFiredSuppression(ctx, typ, addr)) + if (IsFiredSuppression(ctx, rep_typ, addr)) return; - const uptr kMop = 2; VarSizeStackTrace traces[kMop]; - uptr tags[kMop] = {kExternalTagNone}; - uptr toppc = TraceTopPC(thr); - if (toppc >> kEventPCBits) { - // This is a work-around for a known issue. - // The scenario where this happens is rather elaborate and requires - // an instrumented __sanitizer_report_error_summary callback and - // a __tsan_symbolize_external callback and a race during a range memory - // access larger than 8 bytes. MemoryAccessRange adds the current PC to - // the trace and starts processing memory accesses. A first memory access - // triggers a race, we report it and call the instrumented - // __sanitizer_report_error_summary, which adds more stuff to the trace - // since it is intrumented. Then a second memory access in MemoryAccessRange - // also triggers a race and we get here and call TraceTopPC to get the - // current PC, however now it contains some unrelated events from the - // callback. Most likely, TraceTopPC will now return a EventTypeFuncExit - // event. Later we subtract -1 from it (in GetPreviousInstructionPc) - // and the resulting PC has kExternalPCBit set, so we pass it to - // __tsan_symbolize_external_ex. __tsan_symbolize_external_ex is within its - // rights to crash since the PC is completely bogus. - // test/tsan/double_race.cpp contains a test case for this. - toppc = 0; - } - ObtainCurrentStack(thr, toppc, &traces[0], &tags[0]); - if (IsFiredSuppression(ctx, typ, traces[0])) + Tid tids[kMop] = {thr->tid, kInvalidTid}; + uptr tags[kMop] = {kExternalTagNone, kExternalTagNone}; + + ObtainCurrentStack(thr, thr->trace_prev_pc, &traces[0], &tags[0]); + if (IsFiredSuppression(ctx, rep_typ, traces[0])) + return; + + DynamicMutexSet mset1; + MutexSet *mset[kMop] = {&thr->mset, mset1}; + + SlotPairLocker locker(thr, s[1].sid()); + ThreadRegistryLock l0(&ctx->thread_registry); + Lock slots_lock(&ctx->slot_mtx); + if (!RestoreStack(EventType::kAccessExt, s[1].sid(), s[1].epoch(), addr1, + size1, typ1, &tids[1], &traces[1], mset[1], &tags[1])) return; - DynamicMutexSet mset2; - Shadow s2(thr->racy_state[1]); - RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2, &tags[1]); - if (IsFiredSuppression(ctx, typ, traces[1])) + if (IsFiredSuppression(ctx, rep_typ, traces[1])) return; if (HandleRacyStacks(thr, traces)) @@ -908,39 +823,29 @@ uptr tag = kExternalTagNone; for (uptr i = 0; i < kMop; i++) { if (tags[i] != kExternalTagNone) { - typ = ReportTypeExternalRace; + rep_typ = ReportTypeExternalRace; tag = tags[i]; break; } } - ThreadRegistryLock l0(&ctx->thread_registry); - ScopedReport rep(typ, tag); - for (uptr i = 0; i < kMop; i++) { - Shadow s(thr->racy_state[i]); - rep.AddMemoryAccess(addr, tags[i], s, traces[i], - i == 0 ? &thr->mset : mset2); - } + ScopedReport rep(rep_typ, tag); + for (uptr i = 0; i < kMop; i++) + rep.AddMemoryAccess(addr, tags[i], s[i], tids[i], traces[i], mset[i]); for (uptr i = 0; i < kMop; i++) { - FastState s(thr->racy_state[i]); ThreadContext *tctx = static_cast( - ctx->thread_registry.GetThreadLocked(s.tid())); - if (s.epoch() < tctx->epoch0 || s.epoch() > tctx->epoch1) - continue; + ctx->thread_registry.GetThreadLocked(tids[i])); rep.AddThread(tctx); } rep.AddLocation(addr_min, addr_max - addr_min); #if !SANITIZER_GO - { - Shadow s(thr->racy_state[1]); - if (s.epoch() <= thr->last_sleep_clock.get(s.tid())) - rep.AddSleep(thr->last_sleep_stack_id); - } + if (!((typ0 | typ1) & kAccessFree) && + s[1].epoch() <= thr->last_sleep_clock.Get(s[1].sid())) + rep.AddSleep(thr->last_sleep_stack_id); #endif - OutputReport(thr, rep); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp @@ -21,20 +21,14 @@ // ThreadContext implementation. -ThreadContext::ThreadContext(Tid tid) - : ThreadContextBase(tid), thr(), sync(), epoch0(), epoch1() {} +ThreadContext::ThreadContext(Tid tid) : ThreadContextBase(tid), thr(), sync() {} #if !SANITIZER_GO ThreadContext::~ThreadContext() { } #endif -void ThreadContext::OnReset() { - CHECK_EQ(sync.size(), 0); - uptr trace_p = GetThreadTrace(tid); - ReleaseMemoryPagesToOS(trace_p, trace_p + TraceSize() * sizeof(Event)); - //!!! ReleaseMemoryToOS(GetThreadTraceHeader(tid), sizeof(Trace)); -} +void ThreadContext::OnReset() { CHECK(!sync); } #if !SANITIZER_GO struct ThreadLeak { @@ -112,30 +106,35 @@ } struct OnCreatedArgs { - ThreadState *thr; - uptr pc; + VectorClock *sync; + uptr sync_epoch; + StackID stack; }; Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) { - OnCreatedArgs args = { thr, pc }; - u32 parent_tid = thr ? thr->tid : kInvalidTid; // No parent for GCD workers. - Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent_tid, &args); - DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent_tid, tid, uid); + // The main thread and GCD workers don't have a parent thread. + Tid parent = kInvalidTid; + OnCreatedArgs arg = {nullptr, 0, kInvalidStackID}; + if (thr) { + parent = thr->tid; + arg.stack = CurrentStackId(thr, pc); + if (!thr->ignore_sync) { + SlotLocker locker(thr); + thr->clock.ReleaseStore(&arg.sync); + arg.sync_epoch = ctx->global_epoch; + IncrementEpoch(thr); + } + } + Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent, &arg); + DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent, tid, uid); return tid; } void ThreadContext::OnCreated(void *arg) { - thr = 0; - if (tid == kMainTid) - return; OnCreatedArgs *args = static_cast(arg); - if (!args->thr) // GCD workers don't have a parent thread. - return; - args->thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0); - ReleaseImpl(args->thr, 0, &sync); - creation_stack_id = CurrentStackId(args->thr, args->pc); + sync = args->sync; + sync_epoch = args->sync_epoch; + creation_stack_id = args->stack; } extern "C" void __tsan_stack_initialization() {} @@ -150,6 +149,15 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id, ThreadType thread_type) { + ctx->thread_registry.StartThread(tid, os_id, thread_type, thr); + if (!thr->ignore_sync) { + SlotAttachAndLock(thr); + if (thr->tctx->sync_epoch == ctx->global_epoch) + thr->clock.Acquire(thr->tctx->sync); + SlotUnlock(thr); + } + Free(thr->tctx->sync); + uptr stk_addr = 0; uptr stk_size = 0; uptr tls_addr = 0; @@ -159,12 +167,10 @@ GetThreadStackAndTls(tid == kMainTid, &stk_addr, &stk_size, &tls_addr, &tls_size); #endif - - ThreadRegistry *tr = &ctx->thread_registry; - OnStartedArgs args = { thr, stk_addr, stk_size, tls_addr, tls_size }; - tr->StartThread(tid, os_id, thread_type, &args); - - while (!thr->tctx->trace.parts.Empty()) thr->tctx->trace.parts.PopBack(); + thr->stk_addr = stk_addr; + thr->stk_size = stk_size; + thr->tls_addr = tls_addr; + thr->tls_size = tls_size; #if !SANITIZER_GO if (ctx->after_multithreaded_fork) { @@ -192,57 +198,41 @@ } void ThreadContext::OnStarted(void *arg) { - OnStartedArgs *args = static_cast(arg); - thr = args->thr; - // RoundUp so that one trace part does not contain events - // from different threads. - epoch0 = RoundUp(epoch1 + 1, kTracePartSize); - epoch1 = (u64)-1; - new (thr) - ThreadState(ctx, tid, unique_id, epoch0, reuse_count, args->stk_addr, - args->stk_size, args->tls_addr, args->tls_size); + thr = static_cast(arg); + DPrintf("#%d: ThreadStart\n", tid); + new (thr) ThreadState(tid); if (common_flags()->detect_deadlocks) - thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id); - thr->fast_state.SetHistorySize(flags()->history_size); - // Commit switch to the new part of the trace. - // TraceAddEvent will reset stack0/mset0 in the new part for us. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - - thr->fast_synch_epoch = epoch0; - AcquireImpl(thr, 0, &sync); - sync.Reset(&thr->proc()->clock_cache); + thr->dd_lt = ctx->dd->CreateLogicalThread(tid); thr->tctx = this; +#if !SANITIZER_GO thr->is_inited = true; - DPrintf( - "#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx " - "tls_addr=%zx tls_size=%zx\n", - tid, (uptr)epoch0, args->stk_addr, args->stk_size, args->tls_addr, - args->tls_size); +#endif } void ThreadFinish(ThreadState *thr) { + DPrintf("#%d: ThreadFinish\n", thr->tid); ThreadCheckIgnore(thr); if (thr->stk_addr && thr->stk_size) DontNeedShadowFor(thr->stk_addr, thr->stk_size); if (thr->tls_addr && thr->tls_size) DontNeedShadowFor(thr->tls_addr, thr->tls_size); thr->is_dead = true; - thr->is_inited = false; #if !SANITIZER_GO + thr->is_inited = false; thr->ignore_interceptors++; + PlatformCleanUpThreadState(thr); #endif - ctx->thread_registry.FinishThread(thr->tid); -} - -void ThreadContext::OnFinished() { - if (!detached) { - thr->fast_state.IncrementEpoch(); - // Can't increment epoch w/o writing to the trace as well. - TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); - ReleaseImpl(thr, 0, &sync); + if (!thr->ignore_sync) { + SlotLocker locker(thr); + ThreadRegistryLock lock(&ctx->thread_registry); + // Note: detached is protected by the thread registry mutex, + // the thread may be detaching concurrently in another thread. + if (!thr->tctx->detached) { + thr->clock.ReleaseStore(&thr->tctx->sync); + thr->tctx->sync_epoch = ctx->global_epoch; + IncrementEpoch(thr); + } } - epoch1 = thr->fast_state.epoch(); - #if !SANITIZER_GO UnmapOrDie(thr->shadow_stack, kShadowStackSize * sizeof(uptr)); #else @@ -251,18 +241,37 @@ thr->shadow_stack = nullptr; thr->shadow_stack_pos = nullptr; thr->shadow_stack_end = nullptr; - if (common_flags()->detect_deadlocks) ctx->dd->DestroyLogicalThread(thr->dd_lt); - thr->clock.ResetCached(&thr->proc()->clock_cache); -#if !SANITIZER_GO - thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache); -#endif -#if !SANITIZER_GO - PlatformCleanUpThreadState(thr); -#endif + SlotDetach(thr); + ctx->thread_registry.FinishThread(thr->tid); thr->~ThreadState(); - thr = 0; +} + +void ThreadContext::OnFinished() { + Lock lock(&ctx->slot_mtx); + Lock lock1(&trace.mtx); + // Queue all trace parts into the global recycle queue. + auto parts = &trace.parts; + while (trace.local_head) { + CHECK(parts->Queued(trace.local_head)); + ctx->trace_part_recycle.PushBack(trace.local_head); + trace.local_head = parts->Next(trace.local_head); + } + ctx->trace_part_recycle_finished += parts->Size(); + if (ctx->trace_part_recycle_finished > Trace::kFinishedThreadHi) { + ctx->trace_part_finished_excess += parts->Size(); + trace.parts_allocated = 0; + } else if (ctx->trace_part_recycle_finished > Trace::kFinishedThreadLo && + parts->Size() > 1) { + ctx->trace_part_finished_excess += parts->Size() - 1; + trace.parts_allocated = 1; + } + // From now on replay will use trace->final_pos. + trace.final_pos = (Event *)atomic_load_relaxed(&thr->trace_pos); + atomic_store_relaxed(&thr->trace_pos, 0); + thr->tctx = nullptr; + thr = nullptr; } struct ConsumeThreadContext { @@ -274,35 +283,43 @@ return ctx->thread_registry.ConsumeThreadUserId(uid); } +struct JoinArg { + VectorClock *sync; + uptr sync_epoch; +}; + void ThreadJoin(ThreadState *thr, uptr pc, Tid tid) { CHECK_GT(tid, 0); - CHECK_LT(tid, kMaxTid); DPrintf("#%d: ThreadJoin tid=%d\n", thr->tid, tid); - ctx->thread_registry.JoinThread(tid, thr); + JoinArg arg = {}; + ctx->thread_registry.JoinThread(tid, &arg); + if (!thr->ignore_sync) { + SlotLocker locker(thr); + if (arg.sync_epoch == ctx->global_epoch) + thr->clock.Acquire(arg.sync); + } + Free(arg.sync); } -void ThreadContext::OnJoined(void *arg) { - ThreadState *caller_thr = static_cast(arg); - AcquireImpl(caller_thr, 0, &sync); - sync.Reset(&caller_thr->proc()->clock_cache); +void ThreadContext::OnJoined(void *ptr) { + auto arg = static_cast(ptr); + arg->sync = sync; + arg->sync_epoch = sync_epoch; + sync = nullptr; + sync_epoch = 0; } -void ThreadContext::OnDead() { CHECK_EQ(sync.size(), 0); } +void ThreadContext::OnDead() { CHECK_EQ(sync, nullptr); } void ThreadDetach(ThreadState *thr, uptr pc, Tid tid) { CHECK_GT(tid, 0); - CHECK_LT(tid, kMaxTid); ctx->thread_registry.DetachThread(tid, thr); } -void ThreadContext::OnDetached(void *arg) { - ThreadState *thr1 = static_cast(arg); - sync.Reset(&thr1->proc()->clock_cache); -} +void ThreadContext::OnDetached(void *arg) { Free(sync); } void ThreadNotJoined(ThreadState *thr, uptr pc, Tid tid, uptr uid) { CHECK_GT(tid, 0); - CHECK_LT(tid, kMaxTid); ctx->thread_registry.SetThreadUserId(tid, uid); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_shadow.h b/compiler-rt/lib/tsan/rtl/tsan_shadow.h --- a/compiler-rt/lib/tsan/rtl/tsan_shadow.h +++ b/compiler-rt/lib/tsan/rtl/tsan_shadow.h @@ -10,223 +10,170 @@ #define TSAN_SHADOW_H #include "tsan_defs.h" -#include "tsan_trace.h" namespace __tsan { -// FastState (from most significant bit): -// ignore : 1 -// tid : kTidBits -// unused : - -// history_size : 3 -// epoch : kClkBits class FastState { public: - FastState(u64 tid, u64 epoch) { - x_ = tid << kTidShift; - x_ |= epoch; - DCHECK_EQ(tid, this->tid()); - DCHECK_EQ(epoch, this->epoch()); - DCHECK_EQ(GetIgnoreBit(), false); - } - - explicit FastState(u64 x) : x_(x) {} - - u64 raw() const { return x_; } - - u64 tid() const { - u64 res = (x_ & ~kIgnoreBit) >> kTidShift; - return res; - } - - u64 TidWithIgnore() const { - u64 res = x_ >> kTidShift; - return res; - } - - u64 epoch() const { - u64 res = x_ & ((1ull << kClkBits) - 1); - return res; - } + FastState() { Reset(); } - void IncrementEpoch() { - u64 old_epoch = epoch(); - x_ += 1; - DCHECK_EQ(old_epoch + 1, epoch()); - (void)old_epoch; + void Reset() { + part_.unused0_ = 0; + part_.sid_ = static_cast(kFreeSid); + part_.epoch_ = static_cast(kEpochLast); + part_.unused1_ = 0; + part_.ignore_accesses_ = false; } - void SetIgnoreBit() { x_ |= kIgnoreBit; } - void ClearIgnoreBit() { x_ &= ~kIgnoreBit; } - bool GetIgnoreBit() const { return (s64)x_ < 0; } + void SetSid(Sid sid) { part_.sid_ = static_cast(sid); } - void SetHistorySize(int hs) { - CHECK_GE(hs, 0); - CHECK_LE(hs, 7); - x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift); - } + Sid sid() const { return static_cast(part_.sid_); } - ALWAYS_INLINE - int GetHistorySize() const { - return (int)((x_ >> kHistoryShift) & kHistoryMask); - } + Epoch epoch() const { return static_cast(part_.epoch_); } - void ClearHistorySize() { SetHistorySize(0); } + void SetEpoch(Epoch epoch) { part_.epoch_ = static_cast(epoch); } - ALWAYS_INLINE - u64 GetTracePos() const { - const int hs = GetHistorySize(); - // When hs == 0, the trace consists of 2 parts. - const u64 mask = (1ull << (kTracePartSizeBits + hs + 1)) - 1; - return epoch() & mask; - } + void SetIgnoreBit() { part_.ignore_accesses_ = 1; } + void ClearIgnoreBit() { part_.ignore_accesses_ = 0; } + bool GetIgnoreBit() const { return part_.ignore_accesses_; } private: friend class Shadow; - static const int kTidShift = 64 - kTidBits - 1; - static const u64 kIgnoreBit = 1ull << 63; - static const u64 kFreedBit = 1ull << 63; - static const u64 kHistoryShift = kClkBits; - static const u64 kHistoryMask = 7; - u64 x_; + struct Parts { + u32 unused0_ : 8; + u32 sid_ : 8; + u32 epoch_ : kEpochBits; + u32 unused1_ : 1; + u32 ignore_accesses_ : 1; + }; + union { + Parts part_; + u32 raw_; + }; }; -// Shadow (from most significant bit): -// freed : 1 -// tid : kTidBits -// is_atomic : 1 -// is_read : 1 -// size_log : 2 -// addr0 : 3 -// epoch : kClkBits -class Shadow : public FastState { - public: - explicit Shadow(u64 x) : FastState(x) {} +static_assert(sizeof(FastState) == kShadowSize, "bad FastState size"); - explicit Shadow(const FastState &s) : FastState(s.x_) { ClearHistorySize(); } - - void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) { - DCHECK_EQ((x_ >> kClkBits) & 31, 0); - DCHECK_LE(addr0, 7); - DCHECK_LE(kAccessSizeLog, 3); - x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits; - DCHECK_EQ(kAccessSizeLog, size_log()); - DCHECK_EQ(addr0, this->addr0()); - } - - void SetWrite(unsigned kAccessIsWrite) { - DCHECK_EQ(x_ & kReadBit, 0); - if (!kAccessIsWrite) - x_ |= kReadBit; - DCHECK_EQ(kAccessIsWrite, IsWrite()); - } - - void SetAtomic(bool kIsAtomic) { - DCHECK(!IsAtomic()); - if (kIsAtomic) - x_ |= kAtomicBit; - DCHECK_EQ(IsAtomic(), kIsAtomic); - } - - bool IsAtomic() const { return x_ & kAtomicBit; } - - bool IsZero() const { return x_ == 0; } - - static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) { - u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift; - DCHECK_EQ(shifted_xor == 0, s1.TidWithIgnore() == s2.TidWithIgnore()); - return shifted_xor == 0; - } - - static ALWAYS_INLINE bool Addr0AndSizeAreEqual(const Shadow s1, - const Shadow s2) { - u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31; - return masked_xor == 0; +class Shadow { + public: + static constexpr RawShadow kEmpty = static_cast(0); + + Shadow(FastState state, u32 addr, u32 size, AccessType typ) { + raw_ = state.raw_; + DCHECK_GT(size, 0); + DCHECK_LE(size, 8); + UNUSED Sid sid0 = part_.sid_; + UNUSED u16 epoch0 = part_.epoch_; + raw_ |= (!!(typ & kAccessAtomic) << kIsAtomicShift) | + (!!(typ & kAccessRead) << kIsReadShift) | + (((((1u << size) - 1) << (addr & 0x7)) & 0xff) << kAccessShift); + // Note: we don't check kAccessAtomic because it overlaps with + // FastState::ignore_accesses_ and it may be set spuriously. + DCHECK_EQ(part_.is_read_, !!(typ & kAccessRead)); + DCHECK_EQ(sid(), sid0); + DCHECK_EQ(epoch(), epoch0); + } + + explicit Shadow(RawShadow x = Shadow::kEmpty) { raw_ = static_cast(x); } + + RawShadow raw() const { return static_cast(raw_); } + Sid sid() const { return part_.sid_; } + Epoch epoch() const { return static_cast(part_.epoch_); } + u8 access() const { return part_.access_; } + + void GetAccess(uptr *addr, uptr *size, AccessType *typ) const { + DCHECK(part_.access_ != 0 || raw_ == static_cast(Shadow::kRodata)); + if (addr) + *addr = part_.access_ ? __builtin_ffs(part_.access_) - 1 : 0; + if (size) + *size = part_.access_ == kFreeAccess ? kShadowCell + : __builtin_popcount(part_.access_); + if (typ) + *typ = (part_.is_read_ ? kAccessRead : kAccessWrite) | + (part_.is_atomic_ ? kAccessAtomic : 0) | + (part_.access_ == kFreeAccess ? kAccessFree : 0); } - static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2, - unsigned kS2AccessSize) { - bool res = false; - u64 diff = s1.addr0() - s2.addr0(); - if ((s64)diff < 0) { // s1.addr0 < s2.addr0 - // if (s1.addr0() + size1) > s2.addr0()) return true; - if (s1.size() > -diff) - res = true; - } else { - // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true; - if (kS2AccessSize > diff) - res = true; - } - DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2)); - DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1)); + ALWAYS_INLINE + bool IsBothReadsOrAtomic(AccessType typ) const { + u32 is_read = !!(typ & kAccessRead); + u32 is_atomic = !!(typ & kAccessAtomic); + bool res = + raw_ & ((is_atomic << kIsAtomicShift) | (is_read << kIsReadShift)); + DCHECK_EQ(res, + (part_.is_read_ && is_read) || (part_.is_atomic_ && is_atomic)); return res; } - u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; } - u64 ALWAYS_INLINE size() const { return 1ull << size_log(); } - bool ALWAYS_INLINE IsWrite() const { return !IsRead(); } - bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; } - - // The idea behind the freed bit is as follows. - // When the memory is freed (or otherwise unaccessible) we write to the shadow - // values with tid/epoch related to the free and the freed bit set. - // During memory accesses processing the freed bit is considered - // as msb of tid. So any access races with shadow with freed bit set - // (it is as if write from a thread with which we never synchronized before). - // This allows us to detect accesses to freed memory w/o additional - // overheads in memory access processing and at the same time restore - // tid/epoch of free. - void MarkAsFreed() { x_ |= kFreedBit; } - - bool IsFreed() const { return x_ & kFreedBit; } - - bool GetFreedAndReset() { - bool res = x_ & kFreedBit; - x_ &= ~kFreedBit; + ALWAYS_INLINE + bool IsRWWeakerOrEqual(AccessType typ) const { + u32 is_read = !!(typ & kAccessRead); + u32 is_atomic = !!(typ & kAccessAtomic); + UNUSED u32 res0 = + (part_.is_atomic_ > is_atomic) || + (part_.is_atomic_ == is_atomic && part_.is_read_ >= is_read); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + const u32 kAtomicReadMask = (1 << kIsAtomicShift) | (1 << kIsReadShift); + bool res = (raw_ & kAtomicReadMask) >= + ((is_atomic << kIsAtomicShift) | (is_read << kIsReadShift)); + + DCHECK_EQ(res, res0); return res; +#else + return res0; +#endif } - bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const { - bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift) | - (u64(kIsAtomic) << kAtomicShift)); - DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic)); - return v; - } - - bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const { - bool v = ((x_ >> kReadShift) & 3) <= u64((kIsWrite ^ 1) | (kIsAtomic << 1)); - DCHECK_EQ(v, (IsAtomic() < kIsAtomic) || - (IsAtomic() == kIsAtomic && !IsWrite() <= !kIsWrite)); - return v; + // The FreedMarker must not pass "the same access check" so that we don't + // return from the race detection algorithm early. + static RawShadow FreedMarker() { + FastState fs; + fs.SetSid(kFreeSid); + fs.SetEpoch(kEpochLast); + Shadow s(fs, 0, 8, kAccessWrite); + return s.raw(); } - bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const { - bool v = ((x_ >> kReadShift) & 3) >= u64((kIsWrite ^ 1) | (kIsAtomic << 1)); - DCHECK_EQ(v, (IsAtomic() > kIsAtomic) || - (IsAtomic() == kIsAtomic && !IsWrite() >= !kIsWrite)); - return v; + static RawShadow FreedInfo(Sid sid, Epoch epoch) { + Shadow s; + s.part_.sid_ = sid; + s.part_.epoch_ = static_cast(epoch); + s.part_.access_ = kFreeAccess; + return s.raw(); } private: - static const u64 kReadShift = 5 + kClkBits; - static const u64 kReadBit = 1ull << kReadShift; - static const u64 kAtomicShift = 6 + kClkBits; - static const u64 kAtomicBit = 1ull << kAtomicShift; - - u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; } - - static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) { - if (s1.addr0() == s2.addr0()) - return true; - if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0()) - return true; - if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0()) - return true; - return false; - } + struct Parts { + u8 access_; + Sid sid_; + u16 epoch_ : kEpochBits; + u16 is_read_ : 1; + u16 is_atomic_ : 1; + }; + union { + Parts part_; + u32 raw_; + }; + + static constexpr u8 kFreeAccess = 0x81; + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + static constexpr uptr kAccessShift = 0; + static constexpr uptr kIsReadShift = 30; + static constexpr uptr kIsAtomicShift = 31; +#else + static constexpr uptr kAccessShift = 24; + static constexpr uptr kIsReadShift = 1; + static constexpr uptr kIsAtomicShift = 0; +#endif + + public: + // .rodata shadow marker, see MapRodata and ContainsSameAccessFast. + static constexpr RawShadow kRodata = + static_cast(1 << kIsReadShift); }; -const RawShadow kShadowRodata = (RawShadow)-1; // .rodata shadow marker +static_assert(sizeof(Shadow) == kShadowSize, "bad Shadow size"); } // namespace __tsan diff --git a/compiler-rt/lib/tsan/rtl/tsan_sync.h b/compiler-rt/lib/tsan/rtl/tsan_sync.h --- a/compiler-rt/lib/tsan/rtl/tsan_sync.h +++ b/compiler-rt/lib/tsan/rtl/tsan_sync.h @@ -15,9 +15,11 @@ #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_deadlock_detector_interface.h" -#include "tsan_defs.h" #include "tsan_clock.h" +#include "tsan_defs.h" #include "tsan_dense_alloc.h" +#include "tsan_shadow.h" +#include "tsan_vector_clock.h" namespace __tsan { @@ -53,34 +55,18 @@ uptr addr; // overwritten by DenseSlabAlloc freelist Mutex mtx; - u64 uid; // Globally unique id. StackID creation_stack_id; Tid owner_tid; // Set only by exclusive owners. - u64 last_lock; + FastState last_lock; int recursion; atomic_uint32_t flags; u32 next; // in MetaMap DDMutex dd; - SyncClock read_clock; // Used for rw mutexes only. - // The clock is placed last, so that it is situated on a different cache line - // with the mtx. This reduces contention for hot sync objects. - SyncClock clock; + VectorClock *read_clock; // Used for rw mutexes only. + VectorClock *clock; - void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid, bool save_stack); - void Reset(Processor *proc); - - u64 GetId() const { - // 48 lsb is addr, then 14 bits is low part of uid, then 2 zero bits. - return GetLsb((u64)addr | (uid << 48), 60); - } - bool CheckId(u64 uid) const { - CHECK_EQ(uid, GetLsb(uid, 14)); - return GetLsb(this->uid, 14) == uid; - } - static uptr SplitId(u64 id, u64 *uid) { - *uid = id >> 48; - return (uptr)GetLsb(id, 48); - } + void Init(ThreadState *thr, uptr pc, uptr addr, bool save_stack); + void Reset(); bool IsFlagSet(u32 f) const { return atomic_load_relaxed(&flags) & f; @@ -110,9 +96,20 @@ MetaMap(); void AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz); - uptr FreeBlock(Processor *proc, uptr p); - bool FreeRange(Processor *proc, uptr p, uptr sz); - void ResetRange(Processor *proc, uptr p, uptr sz); + + // FreeBlock resets all sync objects in the range if reset=true and must not + // run concurrently with ResetClocks which resets all sync objects + // w/o any synchronization (as part of DoReset). + // If we don't have a thread slot (very early/late in thread lifetime or + // Go/Java callbacks) or the slot is not locked, then reset must be set to + // false. In such case sync object clocks will be reset later (when it's + // reused or during the next ResetClocks). + uptr FreeBlock(Processor *proc, uptr p, bool reset); + bool FreeRange(Processor *proc, uptr p, uptr sz, bool reset); + void ResetRange(Processor *proc, uptr p, uptr sz, bool reset); + // Reset vector clocks of all sync objects. + // Must be called when no other threads access sync objects. + void ResetClocks(); MBlock* GetBlock(uptr p); SyncVar *GetSyncOrCreate(ThreadState *thr, uptr pc, uptr addr, @@ -142,7 +139,6 @@ typedef DenseSlabAlloc SyncAlloc; BlockAlloc block_alloc_; SyncAlloc sync_alloc_; - atomic_uint64_t uid_gen_; SyncVar *GetSync(ThreadState *thr, uptr pc, uptr addr, bool create, bool save_stack); diff --git a/compiler-rt/lib/tsan/rtl/tsan_sync.cpp b/compiler-rt/lib/tsan/rtl/tsan_sync.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_sync.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_sync.cpp @@ -18,43 +18,31 @@ void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s); -SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(0); } +SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(); } -void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid, - bool save_stack) { +void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, bool save_stack) { + Reset(); this->addr = addr; - this->uid = uid; - this->next = 0; - - creation_stack_id = kInvalidStackID; + next = 0; if (save_stack && !SANITIZER_GO) // Go does not use them creation_stack_id = CurrentStackId(thr, pc); if (common_flags()->detect_deadlocks) DDMutexInit(thr, pc, this); } -void SyncVar::Reset(Processor *proc) { - uid = 0; +void SyncVar::Reset() { + CHECK(!ctx->resetting); creation_stack_id = kInvalidStackID; owner_tid = kInvalidTid; - last_lock = 0; + last_lock.Reset(); recursion = 0; atomic_store_relaxed(&flags, 0); - - if (proc == 0) { - CHECK_EQ(clock.size(), 0); - CHECK_EQ(read_clock.size(), 0); - } else { - clock.Reset(&proc->clock_cache); - read_clock.Reset(&proc->clock_cache); - } + Free(clock); + Free(read_clock); } MetaMap::MetaMap() - : block_alloc_(LINKER_INITIALIZED, "heap block allocator"), - sync_alloc_(LINKER_INITIALIZED, "sync allocator") { - atomic_store(&uid_gen_, 0, memory_order_relaxed); -} + : block_alloc_("heap block allocator"), sync_alloc_("sync allocator") {} void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) { u32 idx = block_alloc_.Alloc(&thr->proc()->block_cache); @@ -68,16 +56,16 @@ *meta = idx | kFlagBlock; } -uptr MetaMap::FreeBlock(Processor *proc, uptr p) { +uptr MetaMap::FreeBlock(Processor *proc, uptr p, bool reset) { MBlock* b = GetBlock(p); if (b == 0) return 0; uptr sz = RoundUpTo(b->siz, kMetaShadowCell); - FreeRange(proc, p, sz); + FreeRange(proc, p, sz, reset); return sz; } -bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) { +bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz, bool reset) { bool has_something = false; u32 *meta = MemToMeta(p); u32 *end = MemToMeta(p + sz); @@ -99,7 +87,8 @@ DCHECK(idx & kFlagSync); SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask); u32 next = s->next; - s->Reset(proc); + if (reset) + s->Reset(); sync_alloc_.Free(&proc->sync_cache, idx & ~kFlagMask); idx = next; } else { @@ -116,30 +105,30 @@ // which can be huge. The function probes pages one-by-one until it finds a page // without meta objects, at this point it stops freeing meta objects. Because // thread stacks grow top-down, we do the same starting from end as well. -void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) { +void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz, bool reset) { if (SANITIZER_GO) { // UnmapOrDie/MmapFixedNoReserve does not work on Windows, // so we do the optimization only for C/C++. - FreeRange(proc, p, sz); + FreeRange(proc, p, sz, reset); return; } const uptr kMetaRatio = kMetaShadowCell / kMetaShadowSize; const uptr kPageSize = GetPageSizeCached() * kMetaRatio; if (sz <= 4 * kPageSize) { // If the range is small, just do the normal free procedure. - FreeRange(proc, p, sz); + FreeRange(proc, p, sz, reset); return; } // First, round both ends of the range to page size. uptr diff = RoundUp(p, kPageSize) - p; if (diff != 0) { - FreeRange(proc, p, diff); + FreeRange(proc, p, diff, reset); p += diff; sz -= diff; } diff = p + sz - RoundDown(p + sz, kPageSize); if (diff != 0) { - FreeRange(proc, p + sz - diff, diff); + FreeRange(proc, p + sz - diff, diff, reset); sz -= diff; } // Now we must have a non-empty page-aligned range. @@ -150,7 +139,7 @@ const uptr sz0 = sz; // Probe start of the range. for (uptr checked = 0; sz > 0; checked += kPageSize) { - bool has_something = FreeRange(proc, p, kPageSize); + bool has_something = FreeRange(proc, p, kPageSize, reset); p += kPageSize; sz -= kPageSize; if (!has_something && checked > (128 << 10)) @@ -158,7 +147,7 @@ } // Probe end of the range. for (uptr checked = 0; sz > 0; checked += kPageSize) { - bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize); + bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize, reset); sz -= kPageSize; // Stacks grow down, so sync object are most likely at the end of the region // (if it is a stack). The very end of the stack is TLS and tsan increases @@ -177,6 +166,27 @@ Die(); } +void MetaMap::ResetClocks() { + // This can be called from the background thread + // which does not have proc/cache. + // The cache is too large for stack. + static InternalAllocatorCache cache; + internal_memset(&cache, 0, sizeof(cache)); + internal_allocator()->InitCache(&cache); + sync_alloc_.ForEach([&](SyncVar *s) { + if (s->clock) { + InternalFree(s->clock, &cache); + s->clock = nullptr; + } + if (s->read_clock) { + InternalFree(s->read_clock, &cache); + s->read_clock = nullptr; + } + s->last_lock.Reset(); + }); + internal_allocator()->DestroyCache(&cache); +} + MBlock* MetaMap::GetBlock(uptr p) { u32 *meta = MemToMeta(p); u32 idx = *meta; @@ -193,6 +203,7 @@ SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create, bool save_stack) { + DCHECK(!create || thr->slot_locked); u32 *meta = MemToMeta(addr); u32 idx0 = *meta; u32 myidx = 0; @@ -203,7 +214,7 @@ SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask); if (LIKELY(s->addr == addr)) { if (UNLIKELY(myidx != 0)) { - mys->Reset(thr->proc()); + mys->Reset(); sync_alloc_.Free(&thr->proc()->sync_cache, myidx); } return s; @@ -218,10 +229,9 @@ } if (LIKELY(myidx == 0)) { - const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed); myidx = sync_alloc_.Alloc(&thr->proc()->sync_cache); mys = sync_alloc_.Map(myidx); - mys->Init(thr, pc, addr, uid, save_stack); + mys->Init(thr, pc, addr, save_stack); } mys->next = idx0; if (atomic_compare_exchange_strong((atomic_uint32_t*)meta, &idx0, diff --git a/compiler-rt/lib/tsan/rtl/tsan_trace.h b/compiler-rt/lib/tsan/rtl/tsan_trace.h --- a/compiler-rt/lib/tsan/rtl/tsan_trace.h +++ b/compiler-rt/lib/tsan/rtl/tsan_trace.h @@ -19,57 +19,6 @@ namespace __tsan { -const int kTracePartSizeBits = 13; -const int kTracePartSize = 1 << kTracePartSizeBits; -const int kTraceParts = 2 * 1024 * 1024 / kTracePartSize; -const int kTraceSize = kTracePartSize * kTraceParts; - -// Must fit into 3 bits. -enum EventType { - EventTypeMop, - EventTypeFuncEnter, - EventTypeFuncExit, - EventTypeLock, - EventTypeUnlock, - EventTypeRLock, - EventTypeRUnlock -}; - -// Represents a thread event (from most significant bit): -// u64 typ : 3; // EventType. -// u64 addr : 61; // Associated pc. -typedef u64 Event; - -const uptr kEventPCBits = 61; - -struct TraceHeader { -#if !SANITIZER_GO - BufferedStackTrace stack0; // Start stack for the trace. -#else - VarSizeStackTrace stack0; -#endif - u64 epoch0; // Start epoch for the trace. - MutexSet mset0; - - TraceHeader() : stack0(), epoch0() {} -}; - -struct Trace { - Mutex mtx; -#if !SANITIZER_GO - // Must be last to catch overflow as paging fault. - // Go shadow stack is dynamically allocated. - uptr shadow_stack[kShadowStackSize]; -#endif - // Must be the last field, because we unmap the unused part in - // CreateThreadContext. - TraceHeader headers[kTraceParts]; - - Trace() : mtx(MutexTypeTrace) {} -}; - -namespace v3 { - enum class EventType : u64 { kAccessExt, kAccessRange, @@ -217,6 +166,7 @@ struct TraceHeader { Trace* trace = nullptr; // back-pointer to Trace containing this part INode trace_parts; // in Trace::parts + INode global; // in Contex::trace_part_recycle }; struct TracePart : TraceHeader { @@ -239,13 +189,26 @@ struct Trace { Mutex mtx; IList parts; - Event* final_pos = - nullptr; // final position in the last part for finished threads + // First node non-queued into ctx->trace_part_recycle. + TracePart* local_head; + // Final position in the last part for finished threads. + Event* final_pos = nullptr; + // Number of trace parts allocated on behalf of this trace specifically. + // Total number of parts in this trace can be larger if we retake some + // parts from other traces. + uptr parts_allocated = 0; Trace() : mtx(MutexTypeTrace) {} -}; -} // namespace v3 + // We need at least 3 parts per thread, because we want to keep at last + // 2 parts per thread that are not queued into ctx->trace_part_recycle + // (the current one being filled and one full part that ensures that + // we always have at least one part worth of previous memory accesses). + static constexpr uptr kMinParts = 3; + + static constexpr uptr kFinishedThreadLo = 16; + static constexpr uptr kFinishedThreadHi = 64; +}; } // namespace __tsan diff --git a/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc b/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc deleted file mode 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc +++ /dev/null @@ -1,59 +0,0 @@ -//===-- tsan_update_shadow_word.inc -----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of ThreadSanitizer (TSan), a race detector. -// -// Body of the hottest inner loop. -// If we wrap this body into a function, compilers (both gcc and clang) -// produce sligtly less efficient code. -//===----------------------------------------------------------------------===// -do { - const unsigned kAccessSize = 1 << kAccessSizeLog; - u64 *sp = &shadow_mem[idx]; - old = LoadShadow(sp); - if (LIKELY(old.IsZero())) { - if (!stored) { - StoreIfNotYetStored(sp, &store_word); - stored = true; - } - break; - } - // is the memory access equal to the previous? - if (LIKELY(Shadow::Addr0AndSizeAreEqual(cur, old))) { - // same thread? - if (LIKELY(Shadow::TidsAreEqual(old, cur))) { - if (LIKELY(old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))) { - StoreIfNotYetStored(sp, &store_word); - stored = true; - } - break; - } - if (HappensBefore(old, thr)) { - if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic)) { - StoreIfNotYetStored(sp, &store_word); - stored = true; - } - break; - } - if (LIKELY(old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic))) - break; - goto RACE; - } - // Do the memory access intersect? - if (Shadow::TwoRangesIntersect(old, cur, kAccessSize)) { - if (Shadow::TidsAreEqual(old, cur)) - break; - if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic)) - break; - if (LIKELY(HappensBefore(old, thr))) - break; - goto RACE; - } - // The accesses do not intersect. - break; -} while (0); diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_flags_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_flags_test.cpp --- a/compiler-rt/lib/tsan/tests/unit/tsan_flags_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_flags_test.cpp @@ -98,7 +98,7 @@ EXPECT_EQ(f->memory_limit_mb, 666); EXPECT_EQ(f->stop_on_start, 0); EXPECT_EQ(f->running_on_valgrind, 0); - EXPECT_EQ(f->history_size, 5); + EXPECT_EQ(f->history_size, (uptr)5); EXPECT_EQ(f->io_sync, 1); EXPECT_EQ(f->die_after_fork, true); } @@ -122,7 +122,7 @@ EXPECT_EQ(f->memory_limit_mb, 456); EXPECT_EQ(f->stop_on_start, true); EXPECT_EQ(f->running_on_valgrind, true); - EXPECT_EQ(f->history_size, 6); + EXPECT_EQ(f->history_size, 6ul); EXPECT_EQ(f->io_sync, 2); EXPECT_EQ(f->die_after_fork, false); } diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_shadow_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_shadow_test.cpp --- a/compiler-rt/lib/tsan/tests/unit/tsan_shadow_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_shadow_test.cpp @@ -15,34 +15,70 @@ namespace __tsan { -TEST(Shadow, FastState) { - Shadow s(FastState(11, 22)); - EXPECT_EQ(s.tid(), (u64)11); - EXPECT_EQ(s.epoch(), (u64)22); - EXPECT_EQ(s.GetIgnoreBit(), false); - EXPECT_EQ(s.GetFreedAndReset(), false); - EXPECT_EQ(s.GetHistorySize(), 0); - EXPECT_EQ(s.addr0(), (u64)0); - EXPECT_EQ(s.size(), (u64)1); - EXPECT_EQ(s.IsWrite(), true); - - s.IncrementEpoch(); - EXPECT_EQ(s.epoch(), (u64)23); - s.IncrementEpoch(); - EXPECT_EQ(s.epoch(), (u64)24); - - s.SetIgnoreBit(); - EXPECT_EQ(s.GetIgnoreBit(), true); - s.ClearIgnoreBit(); - EXPECT_EQ(s.GetIgnoreBit(), false); - - for (int i = 0; i < 8; i++) { - s.SetHistorySize(i); - EXPECT_EQ(s.GetHistorySize(), i); - } - s.SetHistorySize(2); - s.ClearHistorySize(); - EXPECT_EQ(s.GetHistorySize(), 0); +void CheckShadow(const Shadow *s, Sid sid, Epoch epoch, uptr addr, uptr size, + AccessType typ) { + uptr addr1 = 0; + uptr size1 = 0; + AccessType typ1 = 0; + s->GetAccess(&addr1, &size1, &typ1); + CHECK_EQ(s->sid(), sid); + CHECK_EQ(s->epoch(), epoch); + CHECK_EQ(addr1, addr); + CHECK_EQ(size1, size); + CHECK_EQ(typ1, typ); +} + +TEST(Shadow, Shadow) { + Sid sid = static_cast(11); + Epoch epoch = static_cast(22); + FastState fs; + fs.SetSid(sid); + fs.SetEpoch(epoch); + CHECK_EQ(fs.sid(), sid); + CHECK_EQ(fs.epoch(), epoch); + CHECK_EQ(fs.GetIgnoreBit(), false); + fs.SetIgnoreBit(); + CHECK_EQ(fs.GetIgnoreBit(), true); + fs.ClearIgnoreBit(); + CHECK_EQ(fs.GetIgnoreBit(), false); + + Shadow s0(fs, 1, 2, kAccessWrite); + CheckShadow(&s0, sid, epoch, 1, 2, kAccessWrite); + Shadow s1(fs, 2, 3, kAccessRead); + CheckShadow(&s1, sid, epoch, 2, 3, kAccessRead); + Shadow s2(fs, 0xfffff8 + 4, 1, kAccessWrite | kAccessAtomic); + CheckShadow(&s2, sid, epoch, 4, 1, kAccessWrite | kAccessAtomic); + Shadow s3(fs, 0xfffff8 + 0, 8, kAccessRead | kAccessAtomic); + CheckShadow(&s3, sid, epoch, 0, 8, kAccessRead | kAccessAtomic); + + CHECK(!s0.IsBothReadsOrAtomic(kAccessRead | kAccessAtomic)); + CHECK(!s1.IsBothReadsOrAtomic(kAccessAtomic)); + CHECK(!s1.IsBothReadsOrAtomic(kAccessWrite)); + CHECK(s1.IsBothReadsOrAtomic(kAccessRead)); + CHECK(s2.IsBothReadsOrAtomic(kAccessAtomic)); + CHECK(!s2.IsBothReadsOrAtomic(kAccessWrite)); + CHECK(!s2.IsBothReadsOrAtomic(kAccessRead)); + CHECK(s3.IsBothReadsOrAtomic(kAccessAtomic)); + CHECK(!s3.IsBothReadsOrAtomic(kAccessWrite)); + CHECK(s3.IsBothReadsOrAtomic(kAccessRead)); + + CHECK(!s0.IsRWWeakerOrEqual(kAccessRead | kAccessAtomic)); + CHECK(s1.IsRWWeakerOrEqual(kAccessWrite)); + CHECK(s1.IsRWWeakerOrEqual(kAccessRead)); + CHECK(!s1.IsRWWeakerOrEqual(kAccessWrite | kAccessAtomic)); + + CHECK(!s2.IsRWWeakerOrEqual(kAccessRead | kAccessAtomic)); + CHECK(s2.IsRWWeakerOrEqual(kAccessWrite | kAccessAtomic)); + CHECK(s2.IsRWWeakerOrEqual(kAccessRead)); + CHECK(s2.IsRWWeakerOrEqual(kAccessWrite)); + + CHECK(s3.IsRWWeakerOrEqual(kAccessRead | kAccessAtomic)); + CHECK(s3.IsRWWeakerOrEqual(kAccessWrite | kAccessAtomic)); + CHECK(s3.IsRWWeakerOrEqual(kAccessRead)); + CHECK(s3.IsRWWeakerOrEqual(kAccessWrite)); + + Shadow sro(Shadow::kRodata); + CheckShadow(&sro, static_cast(0), kEpochZero, 0, 0, kAccessRead); } TEST(Shadow, Mapping) { diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cpp --- a/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cpp @@ -18,7 +18,7 @@ template static void TestStackTrace(StackTraceTy *trace) { - ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0, 0); + ThreadState thr(kMainTid); ObtainCurrentStack(&thr, 0, trace); EXPECT_EQ(0U, trace->size); @@ -43,7 +43,7 @@ template static void TestTrim(StackTraceTy *trace) { - ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0, 0); + ThreadState thr(kMainTid); for (uptr i = 0; i < 2 * kStackTraceMax; ++i) *thr.shadow_stack_pos++ = 100 + i; diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_sync_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_sync_test.cpp --- a/compiler-rt/lib/tsan/tests/unit/tsan_sync_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_sync_test.cpp @@ -17,6 +17,7 @@ TEST(MetaMap, Basic) { ThreadState *thr = cur_thread(); + SlotLocker locker(thr); MetaMap *m = &ctx->metamap; u64 block[1] = {}; // fake malloc block m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64)); @@ -24,7 +25,7 @@ CHECK_NE(mb, (MBlock *)0); CHECK_EQ(mb->siz, 1 * sizeof(u64)); CHECK_EQ(mb->tid, thr->tid); - uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0]); + uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0], true); CHECK_EQ(sz, 1 * sizeof(u64)); mb = m->GetBlock((uptr)&block[0]); CHECK_EQ(mb, (MBlock *)0); @@ -32,6 +33,7 @@ TEST(MetaMap, FreeRange) { ThreadState *thr = cur_thread(); + SlotLocker locker(thr); MetaMap *m = &ctx->metamap; u64 block[4] = {}; // fake malloc block m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64)); @@ -40,7 +42,7 @@ CHECK_EQ(mb1->siz, 1 * sizeof(u64)); MBlock *mb2 = m->GetBlock((uptr)&block[1]); CHECK_EQ(mb2->siz, 3 * sizeof(u64)); - m->FreeRange(thr->proc(), (uptr)&block[0], 4 * sizeof(u64)); + m->FreeRange(thr->proc(), (uptr)&block[0], 4 * sizeof(u64), true); mb1 = m->GetBlock((uptr)&block[0]); CHECK_EQ(mb1, (MBlock *)0); mb2 = m->GetBlock((uptr)&block[1]); @@ -52,6 +54,7 @@ // them from detecting that we exit runtime with mutexes held. ScopedIgnoreInterceptors ignore; ThreadState *thr = cur_thread(); + SlotLocker locker(thr); MetaMap *m = &ctx->metamap; u64 block[4] = {}; // fake malloc block m->AllocBlock(thr, 0, (uptr)&block[0], 4 * sizeof(u64)); @@ -63,7 +66,7 @@ SyncVar *s2 = m->GetSyncOrCreate(thr, 0, (uptr)&block[1], false); CHECK_NE(s2, (SyncVar *)0); CHECK_EQ(s2->addr, (uptr)&block[1]); - m->FreeBlock(thr->proc(), (uptr)&block[0]); + m->FreeBlock(thr->proc(), (uptr)&block[0], true); s1 = m->GetSyncIfExists((uptr)&block[0]); CHECK_EQ(s1, (SyncVar *)0); s2 = m->GetSyncIfExists((uptr)&block[1]); @@ -74,6 +77,7 @@ TEST(MetaMap, MoveMemory) { ScopedIgnoreInterceptors ignore; ThreadState *thr = cur_thread(); + SlotLocker locker(thr); MetaMap *m = &ctx->metamap; u64 block1[4] = {}; // fake malloc block u64 block2[4] = {}; // fake malloc block @@ -102,18 +106,19 @@ s2 = m->GetSyncIfExists((uptr)&block2[1]); CHECK_NE(s2, (SyncVar *)0); CHECK_EQ(s2->addr, (uptr)&block2[1]); - m->FreeRange(thr->proc(), (uptr)&block2[0], 4 * sizeof(u64)); + m->FreeRange(thr->proc(), (uptr)&block2[0], 4 * sizeof(u64), true); } TEST(MetaMap, ResetSync) { ScopedIgnoreInterceptors ignore; ThreadState *thr = cur_thread(); + SlotLocker locker(thr); MetaMap *m = &ctx->metamap; u64 block[1] = {}; // fake malloc block m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64)); SyncVar *s = m->GetSyncOrCreate(thr, 0, (uptr)&block[0], false); - s->Reset(thr->proc()); - uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0]); + s->Reset(); + uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0], true); CHECK_EQ(sz, 1 * sizeof(u64)); } diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp --- a/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp @@ -31,8 +31,6 @@ namespace __tsan { -using namespace v3; - // We need to run all trace tests in a new thread, // so that the thread trace is empty initially. template @@ -78,27 +76,30 @@ ThreadArray<1> thr; TraceFunc(thr, 0x1000); TraceFunc(thr, 0x1001); - TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); - TraceMutexLock(thr, v3::EventType::kLock, 0x4001, 0x5001, 0x6001); + TraceMutexLock(thr, EventType::kLock, 0x4000, 0x5000, 0x6000); + TraceMutexLock(thr, EventType::kLock, 0x4001, 0x5001, 0x6001); TraceMutexUnlock(thr, 0x5000); TraceFunc(thr); CHECK(TryTraceMemoryAccess(thr, 0x2001, 0x3001, 8, kAccessRead)); - TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5002, 0x6002); + TraceMutexLock(thr, EventType::kRLock, 0x4002, 0x5002, 0x6002); TraceFunc(thr, 0x1002); CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, 8, kAccessRead)); // This is the access we want to find. // The previous one is equivalent, but RestoreStack must prefer // the last of the matchig accesses. CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead)); - Lock lock1(&ctx->slot_mtx); - ThreadRegistryLock lock2(&ctx->thread_registry); + SlotPairLocker locker(thr, thr->fast_state.sid()); + ThreadRegistryLock lock1(&ctx->thread_registry); + Lock lock2(&ctx->slot_mtx); + Tid tid = kInvalidTid; VarSizeStackTrace stk; MutexSet mset; uptr tag = kExternalTagNone; - bool res = - RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch, - 0x3000, 8, kAccessRead, &stk, &mset, &tag); + bool res = RestoreStack(EventType::kAccessExt, thr->fast_state.sid(), + thr->fast_state.epoch(), 0x3000, 8, kAccessRead, &tid, + &stk, &mset, &tag); CHECK(res); + CHECK_EQ(tid, thr->tid); CHECK_EQ(stk.size, 3); CHECK_EQ(stk.trace[0], 0x1000); CHECK_EQ(stk.trace[1], 0x1002); @@ -147,14 +148,17 @@ kAccessRead); break; } - Lock lock1(&ctx->slot_mtx); - ThreadRegistryLock lock2(&ctx->thread_registry); + SlotPairLocker locker(thr, thr->fast_state.sid()); + ThreadRegistryLock lock1(&ctx->thread_registry); + Lock lock2(&ctx->slot_mtx); + Tid tid = kInvalidTid; VarSizeStackTrace stk; MutexSet mset; uptr tag = kExternalTagNone; - bool res = RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, - thr->epoch, 0x3000 + params.offset, params.size, - kAccessRead, &stk, &mset, &tag); + bool res = + RestoreStack(EventType::kAccessExt, thr->fast_state.sid(), + thr->fast_state.epoch(), 0x3000 + params.offset, + params.size, kAccessRead, &tid, &stk, &mset, &tag); CHECK_EQ(res, params.res); if (params.res) { CHECK_EQ(stk.size, 2); @@ -169,16 +173,19 @@ // Check of restoration of a mutex lock event. ThreadArray<1> thr; TraceFunc(thr, 0x1000); - TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); - TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001); - TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5001, 0x6002); - Lock lock1(&ctx->slot_mtx); - ThreadRegistryLock lock2(&ctx->thread_registry); + TraceMutexLock(thr, EventType::kLock, 0x4000, 0x5000, 0x6000); + TraceMutexLock(thr, EventType::kRLock, 0x4001, 0x5001, 0x6001); + TraceMutexLock(thr, EventType::kRLock, 0x4002, 0x5001, 0x6002); + SlotPairLocker locker(thr, thr->fast_state.sid()); + ThreadRegistryLock lock1(&ctx->thread_registry); + Lock lock2(&ctx->slot_mtx); + Tid tid = kInvalidTid; VarSizeStackTrace stk; MutexSet mset; uptr tag = kExternalTagNone; - bool res = RestoreStack(thr->tid, v3::EventType::kLock, thr->sid, thr->epoch, - 0x5001, 0, 0, &stk, &mset, &tag); + bool res = RestoreStack(EventType::kLock, thr->fast_state.sid(), + thr->fast_state.epoch(), 0x5001, 0, 0, &tid, &stk, + &mset, &tag); CHECK(res); CHECK_EQ(stk.size, 2); CHECK_EQ(stk.trace[0], 0x1000); @@ -195,28 +202,35 @@ TRACE_TEST(Trace, MultiPart) { // Check replay of a trace with multiple parts. ThreadArray<1> thr; - TraceFunc(thr, 0x1000); - TraceFunc(thr, 0x2000); - TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); - const uptr kEvents = 3 * sizeof(TracePart) / sizeof(v3::Event); + FuncEntry(thr, 0x1000); + FuncEntry(thr, 0x2000); + MutexPreLock(thr, 0x4000, 0x5000, 0); + MutexPostLock(thr, 0x4000, 0x5000, 0); + MutexPreLock(thr, 0x4000, 0x5000, 0); + MutexPostLock(thr, 0x4000, 0x5000, 0); + const uptr kEvents = 3 * sizeof(TracePart) / sizeof(Event); for (uptr i = 0; i < kEvents; i++) { - TraceFunc(thr, 0x3000); - TraceMutexLock(thr, v3::EventType::kLock, 0x4002, 0x5002, 0x6002); - TraceMutexUnlock(thr, 0x5002); - TraceFunc(thr); + FuncEntry(thr, 0x3000); + MutexPreLock(thr, 0x4002, 0x5002, 0); + MutexPostLock(thr, 0x4002, 0x5002, 0); + MutexUnlock(thr, 0x4003, 0x5002, 0); + FuncExit(thr); } - TraceFunc(thr, 0x4000); - TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001); + FuncEntry(thr, 0x4000); + TraceMutexLock(thr, EventType::kRLock, 0x4001, 0x5001, 0x6001); CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead)); - Lock lock1(&ctx->slot_mtx); - ThreadRegistryLock lock2(&ctx->thread_registry); + SlotPairLocker locker(thr, thr->fast_state.sid()); + ThreadRegistryLock lock1(&ctx->thread_registry); + Lock lock2(&ctx->slot_mtx); + Tid tid = kInvalidTid; VarSizeStackTrace stk; MutexSet mset; uptr tag = kExternalTagNone; - bool res = - RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch, - 0x3000, 8, kAccessRead, &stk, &mset, &tag); + bool res = RestoreStack(EventType::kAccessExt, thr->fast_state.sid(), + thr->fast_state.epoch(), 0x3000, 8, kAccessRead, &tid, + &stk, &mset, &tag); CHECK(res); + CHECK_EQ(tid, thr->tid); CHECK_EQ(stk.size, 4); CHECK_EQ(stk.trace[0], 0x1000); CHECK_EQ(stk.trace[1], 0x2000); @@ -224,11 +238,94 @@ CHECK_EQ(stk.trace[3], 0x2002); CHECK_EQ(mset.Size(), 2); CHECK_EQ(mset.Get(0).addr, 0x5000); - CHECK_EQ(mset.Get(0).stack_id, 0x6000); CHECK_EQ(mset.Get(0).write, true); + CHECK_EQ(mset.Get(0).count, 2); CHECK_EQ(mset.Get(1).addr, 0x5001); - CHECK_EQ(mset.Get(1).stack_id, 0x6001); CHECK_EQ(mset.Get(1).write, false); + CHECK_EQ(mset.Get(1).count, 1); +} + +void CheckTraceState(uptr count, uptr finished, uptr excess, uptr recycle) { + Lock l(&ctx->slot_mtx); + Printf("CheckTraceState(%zu/%zu, %zu/%zu, %zu/%zu, %zu/%zu)\n", + ctx->trace_part_total_allocated, count, + ctx->trace_part_recycle_finished, finished, + ctx->trace_part_finished_excess, excess, + ctx->trace_part_recycle.Size(), recycle); + CHECK_EQ(ctx->trace_part_total_allocated, count); + CHECK_EQ(ctx->trace_part_recycle_finished, finished); + CHECK_EQ(ctx->trace_part_finished_excess, excess); + CHECK_EQ(ctx->trace_part_recycle.Size(), recycle); +} + +TRACE_TEST(TraceAlloc, SingleThread) { + TraceResetForTesting(); + auto check_thread = [&](ThreadState *thr, uptr size, uptr count, + uptr finished, uptr excess, uptr recycle) { + CHECK_EQ(thr->tctx->trace.parts.Size(), size); + CheckTraceState(count, finished, excess, recycle); + }; + ThreadArray<2> threads; + check_thread(threads[0], 0, 0, 0, 0, 0); + TraceSwitchPartImpl(threads[0]); + check_thread(threads[0], 1, 1, 0, 0, 0); + TraceSwitchPartImpl(threads[0]); + check_thread(threads[0], 2, 2, 0, 0, 0); + TraceSwitchPartImpl(threads[0]); + check_thread(threads[0], 3, 3, 0, 0, 1); + TraceSwitchPartImpl(threads[0]); + check_thread(threads[0], 3, 3, 0, 0, 1); + threads.Finish(0); + CheckTraceState(3, 3, 0, 3); + threads.Finish(1); + CheckTraceState(3, 3, 0, 3); +} + +TRACE_TEST(TraceAlloc, FinishedThreadReuse) { + TraceResetForTesting(); + constexpr uptr Hi = Trace::kFinishedThreadHi; + constexpr uptr kThreads = 4 * Hi; + ThreadArray threads; + for (uptr i = 0; i < kThreads; i++) { + Printf("thread %zu\n", i); + TraceSwitchPartImpl(threads[i]); + if (i <= Hi) + CheckTraceState(i + 1, i, 0, i); + else if (i <= 2 * Hi) + CheckTraceState(Hi + 1, Hi, i - Hi, Hi); + else + CheckTraceState(Hi + 1, Hi, Hi, Hi); + threads.Finish(i); + if (i < Hi) + CheckTraceState(i + 1, i + 1, 0, i + 1); + else if (i < 2 * Hi) + CheckTraceState(Hi + 1, Hi + 1, i - Hi + 1, Hi + 1); + else + CheckTraceState(Hi + 1, Hi + 1, Hi + 1, Hi + 1); + } +} + +TRACE_TEST(TraceAlloc, FinishedThreadReuse2) { + TraceResetForTesting(); + // constexpr uptr Lo = Trace::kFinishedThreadLo; + // constexpr uptr Hi = Trace::kFinishedThreadHi; + constexpr uptr Min = Trace::kMinParts; + constexpr uptr kThreads = 10; + constexpr uptr kParts = 2 * Min; + ThreadArray threads; + for (uptr i = 0; i < kThreads; i++) { + Printf("thread %zu\n", i); + for (uptr j = 0; j < kParts; j++) TraceSwitchPartImpl(threads[i]); + if (i == 0) + CheckTraceState(Min, 0, 0, 1); + else + CheckTraceState(2 * Min, 0, Min, Min + 1); + threads.Finish(i); + if (i == 0) + CheckTraceState(Min, Min, 0, Min); + else + CheckTraceState(2 * Min, 2 * Min, Min, 2 * Min); + } } } // namespace __tsan diff --git a/compiler-rt/test/tsan/bench_threads.cpp b/compiler-rt/test/tsan/bench_threads.cpp --- a/compiler-rt/test/tsan/bench_threads.cpp +++ b/compiler-rt/test/tsan/bench_threads.cpp @@ -4,11 +4,6 @@ // bench.h needs pthread barriers which are not available on OS X // UNSUPPORTED: darwin -// aarch64 fails with: -// CHECK failed: tsan_rtl.cpp:327 "((addr + size)) <= ((TraceMemEnd()))" -// TODO: try to re-enable when D112603 is landed. -// XFAIL: aarch64 - #include "bench.h" void *nop_thread(void *arg) { diff --git a/compiler-rt/test/tsan/free_race2.c b/compiler-rt/test/tsan/free_race2.c --- a/compiler-rt/test/tsan/free_race2.c +++ b/compiler-rt/test/tsan/free_race2.c @@ -28,7 +28,7 @@ } // CHECK: WARNING: ThreadSanitizer: heap-use-after-free -// CHECK: Write of size 8 at {{.*}} by main thread: +// CHECK: Write of size {{.*}} at {{.*}} by main thread: // CHECK: #0 bar // CHECK: #1 main // CHECK: Previous write of size 8 at {{.*}} by main thread: diff --git a/compiler-rt/test/tsan/memcmp_race.cpp b/compiler-rt/test/tsan/memcmp_race.cpp --- a/compiler-rt/test/tsan/memcmp_race.cpp +++ b/compiler-rt/test/tsan/memcmp_race.cpp @@ -34,7 +34,7 @@ // CHECK: addr=[[ADDR:0x[0-9,a-f]+]] // CHECK: WARNING: ThreadSanitizer: data race -// CHECK: Write of size 1 at [[ADDR]] by thread T2: +// CHECK: Write of size 3 at [[ADDR]] by thread T2: // CHECK: #0 {{(memcpy|memmove)}} // CHECK: #{{[12]}} Thread2 // CHECK: Previous read of size 1 at [[ADDR]] by thread T1: diff --git a/compiler-rt/test/tsan/memcpy_race.cpp b/compiler-rt/test/tsan/memcpy_race.cpp --- a/compiler-rt/test/tsan/memcpy_race.cpp +++ b/compiler-rt/test/tsan/memcpy_race.cpp @@ -22,7 +22,8 @@ int main() { barrier_init(&barrier, 2); - print_address("addr=", 1, &data[5]); + print_address("addr1=", 1, &data[3]); + print_address("addr2=", 1, &data[5]); pthread_t t[2]; pthread_create(&t[0], NULL, Thread1, NULL); pthread_create(&t[1], NULL, Thread2, NULL); @@ -31,11 +32,12 @@ return 0; } -// CHECK: addr=[[ADDR:0x[0-9,a-f]+]] +// CHECK: addr1=[[ADDR1:0x[0-9,a-f]+]] +// CHECK: addr2=[[ADDR2:0x[0-9,a-f]+]] // CHECK: WARNING: ThreadSanitizer: data race -// CHECK: Write of size 1 at [[ADDR]] by thread T2: +// CHECK: Write of size 4 at [[ADDR1]] by thread T2: // CHECK: #0 {{(memcpy|memmove)}} // CHECK: #{{[12]}} Thread2 -// CHECK: Previous write of size 1 at [[ADDR]] by thread T1: +// CHECK: Previous write of size 1 at [[ADDR2]] by thread T1: // CHECK: #0 {{(memcpy|memmove)}} // CHECK: #{{[12]}} Thread1 diff --git a/compiler-rt/test/tsan/mutexset7.cpp b/compiler-rt/test/tsan/mutexset7.cpp --- a/compiler-rt/test/tsan/mutexset7.cpp +++ b/compiler-rt/test/tsan/mutexset7.cpp @@ -36,6 +36,6 @@ // CHECK: Write of size 4 at {{.*}} by thread T1: // CHECK: Previous write of size 4 at {{.*}} by thread T2 // CHECK: (mutexes: write [[M1:M[0-9]+]]): -// CHECK: Mutex [[M1]] is already destroyed -// CHECK-NOT: Mutex {{.*}} created at - +// CHECK: Mutex [[M1]] (0x{{.*}}) created at: +// CHECK: #0 pthread_mutex_init +// CHECK: #1 Thread2