Index: lib/tsan/rtl/tsan_interface_atomic.cc =================================================================== --- lib/tsan/rtl/tsan_interface_atomic.cc +++ lib/tsan/rtl/tsan_interface_atomic.cc @@ -474,7 +474,7 @@ #define SCOPED_ATOMIC(func, ...) \ ThreadState *const thr = cur_thread(); \ - if (thr->ignore_sync || thr->ignore_interceptors) { \ + if (UNLIKELY(thr->ignore_sync || thr->ignore_interceptors)) { \ ProcessPendingSignals(thr); \ return NoTsanAtomic##func(__VA_ARGS__); \ } \ Index: lib/tsan/rtl/tsan_rtl.cc =================================================================== --- lib/tsan/rtl/tsan_rtl.cc +++ lib/tsan/rtl/tsan_rtl.cc @@ -641,6 +641,7 @@ // __m128i _mm_move_epi64(__m128i*); // _mm_storel_epi64(u64*, __m128i); u64 store_word = cur.raw(); + bool stored = false; // scan all the shadow values and dispatch to 4 categories: // same, replace, candidate and race (see comments below). @@ -665,16 +666,28 @@ int idx = 0; #include "tsan_update_shadow_word_inl.h" idx = 1; + if (stored) { #include "tsan_update_shadow_word_inl.h" + } else { +#include "tsan_update_shadow_word_inl.h" + } idx = 2; + if (stored) { +#include "tsan_update_shadow_word_inl.h" + } else { #include "tsan_update_shadow_word_inl.h" + } idx = 3; + if (stored) { +#include "tsan_update_shadow_word_inl.h" + } else { #include "tsan_update_shadow_word_inl.h" + } #endif // we did not find any races and had already stored // the current access info, so we are done - if (LIKELY(store_word == 0)) + if (LIKELY(stored)) return; // choose a random candidate slot and replace it StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word); @@ -814,7 +827,7 @@ } #endif - if (!SANITIZER_GO && *shadow_mem == kShadowRodata) { + if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) { // Access to .rodata section, no races here. // Measurements show that it can be 10-20% of all memory accesses. StatInc(thr, StatMop); @@ -825,7 +838,7 @@ } FastState fast_state = thr->fast_state; - if (fast_state.GetIgnoreBit()) { + if (UNLIKELY(fast_state.GetIgnoreBit())) { StatInc(thr, StatMop); StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead); StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog)); Index: lib/tsan/rtl/tsan_update_shadow_word_inl.h =================================================================== --- lib/tsan/rtl/tsan_update_shadow_word_inl.h +++ lib/tsan/rtl/tsan_update_shadow_word_inl.h @@ -17,31 +17,35 @@ const unsigned kAccessSize = 1 << kAccessSizeLog; u64 *sp = &shadow_mem[idx]; old = LoadShadow(sp); - if (old.IsZero()) { + if (LIKELY(old.IsZero())) { StatInc(thr, StatShadowZero); - if (store_word) + if (!stored) { StoreIfNotYetStored(sp, &store_word); - // The above StoreIfNotYetStored could be done unconditionally - // and it even shows 4% gain on synthetic benchmarks (r4307). + stored = true; + } break; } // is the memory access equal to the previous? - if (Shadow::Addr0AndSizeAreEqual(cur, old)) { + if (LIKELY(Shadow::Addr0AndSizeAreEqual(cur, old))) { StatInc(thr, StatShadowSameSize); // same thread? - if (Shadow::TidsAreEqual(old, cur)) { + if (LIKELY(Shadow::TidsAreEqual(old, cur))) { StatInc(thr, StatShadowSameThread); - if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic)) + if (LIKELY(old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))) { StoreIfNotYetStored(sp, &store_word); + stored = true; + } break; } StatInc(thr, StatShadowAnotherThread); if (HappensBefore(old, thr)) { - if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic)) + if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic)) { StoreIfNotYetStored(sp, &store_word); + stored = true; + } break; } - if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic)) + if (LIKELY(old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic))) break; goto RACE; } @@ -55,7 +59,7 @@ StatInc(thr, StatShadowAnotherThread); if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic)) break; - if (HappensBefore(old, thr)) + if (LIKELY(HappensBefore(old, thr))) break; goto RACE; }