This is an archive of the discontinued LLVM Phabricator instance.

tsan: optimize sync clock memory consumption
ClosedPublic

Authored by dvyukov on Jul 12 2017, 12:55 PM.

Download Raw Diff

Details

Reviewers

kcc
alekseyshl

Summary

This change implements 2 optimizations of sync clocks that reduce memory consumption:

Use previously unused first level block space to store clock elements.

Currently a clock for 100 threads consumes 3 512-byte blocks:

2 64-bit second level blocks to store clock elements
+1 32-bit first level block to store indices to second level blocks

Only 8 bytes of the first level block are actually used.
With this change such clock consumes only 2 blocks.

Share similar clocks differing only by a single clock entry for the current thread.

When a thread does several release operations on fresh sync objects without intervening
acquire operations in between (e.g. initialization of several fields in ctor),
the resulting clocks differ only by a single entry for the current thread.
This change reuses a single clock for such release operations. The current thread time
(which is different for different clocks) is stored in dirty entries.

We are experiencing issues with a large program that eats all 64M clock blocks
(32GB of non-flushable memory) and crashes with dense allocator overflow.
Max number of threads in the program is ~170 which is currently quite unfortunate
(consume 4 blocks per clock). Currently it crashes after consuming 60+ GB of memory.
The first optimization brings clock block consumption down to ~40M and
allows the program to work. The second optimization further reduces block consumption
to "modest" 16M blocks (~8GB of RAM) and reduces overall RAM consumption to ~30GB.

Measurements on another real world C++ RPC benchmark show RSS reduction
from 3.491G to 3.186G and a modest speedup of ~5%.

Go parallel client/server HTTP benchmark:
https://github.com/golang/benchmarks/blob/master/http/http.go
shows RSS reduction from 320MB to 240MB and a few percent speedup.

Diff Detail

Event Timeline

dvyukov created this revision.Jul 12 2017, 12:55 PM

Herald added a subscriber: kubamracek. · View Herald TranscriptJul 12 2017, 12:55 PM

kcc added a reviewer: alekseyshl.Jul 12 2017, 1:46 PM

alekseyshl accepted this revision.Jul 13 2017, 1:30 PM

alekseyshl added inline comments.

lib/tsan/rtl/tsan_clock.cc
247	but, for simplicity, we currently
263	For my education, why increasing refcount needs less strict memory order than decreasing it?
341	threads -> thread
445–514	dirst -> dirty
lib/tsan/rtl/tsan_clock.h
34	Maybe name them get_test_only and get_clean_test_only then?
115	Cachable -> Cacheable
lib/tsan/rtl/tsan_defs.h
102	Curious why invalid tid has more bits than KTidBits but less bits than tid field in Dirty struct (22)?
lib/tsan/tests/unit/tsan_clock_test.cc
59	ARRAY_SIZE

This revision is now accepted and ready to land.Jul 13 2017, 1:30 PM

dvyukov updated this revision to Diff 106618.Jul 14 2017, 4:28 AM

dvyukov marked 6 inline comments as done.

dvyukov added inline comments.

lib/tsan/rtl/tsan_clock.cc
263	Because with traditional ref counting a thread can only acquire a reference to an object only if it already owns one. So it actually does not acquire anything new and acquire is not a synchronization operation. While release operation does release current thread's ownership of the object and is a synchronization operation.
lib/tsan/rtl/tsan_defs.h
102	The current version does not work because we now put it into a bitfield with 22 bits (64-kClkBits), so we get warnings about constant truncation. the const gets truncated and then becomes not equal to kInvalidTid after extraction (it's unsigned so it's not sign extended) 0xffff solves both problems. Changed it to kMaxTid + 1, it's less magical this way.

Submitted as 308018.

Revision Contents

Path

Size

lib/

tsan/

rtl/

tsan_clock.h

209 lines

tsan_clock.cc

436 lines

tsan_defs.h

33 lines

tests/

unit/

tsan_clock_test.cc

33 lines

Diff 106618

lib/tsan/rtl/tsan_clock.h

	Show All 12 Lines
	#ifndef TSAN_CLOCK_H			#ifndef TSAN_CLOCK_H
	#define TSAN_CLOCK_H			#define TSAN_CLOCK_H

	#include "tsan_defs.h"			#include "tsan_defs.h"
	#include "tsan_dense_alloc.h"			#include "tsan_dense_alloc.h"

	namespace __tsan {			namespace __tsan {

	struct ClockElem {
	u64 epoch : kClkBits;
	u64 reused : 64 - kClkBits;
	};

	struct ClockBlock {
	static const uptr kSize = 512;
	static const uptr kTableSize = kSize / sizeof(u32);
	static const uptr kClockCount = kSize / sizeof(ClockElem);

	union {
	u32 table[kTableSize];
	ClockElem clock[kClockCount];
	};

	ClockBlock() {
	}
	};

	typedef DenseSlabAlloc<ClockBlock, 1<<16, 1<<10> ClockAlloc;			typedef DenseSlabAlloc<ClockBlock, 1<<16, 1<<10> ClockAlloc;
	typedef DenseSlabAllocCache ClockCache;			typedef DenseSlabAllocCache ClockCache;

	// The clock that lives in sync variables (mutexes, atomics, etc).			// The clock that lives in sync variables (mutexes, atomics, etc).
	class SyncClock {			class SyncClock {
	public:			public:
	SyncClock();			SyncClock();
	~SyncClock();			~SyncClock();

	uptr size() const {			uptr size() const;
	return size_;
	}

	u64 get(unsigned tid) const {			// These are used only in tests.
	return elem(tid).epoch;			u64 get(unsigned tid) const;
	}			u64 get_clean(unsigned tid) const;
				alekseyshlUnsubmitted Not Done Reply Inline Actions Maybe name them get_test_only and get_clean_test_only then? alekseyshl: Maybe name them get_test_only and get_clean_test_only then?

	void Resize(ClockCache *c, uptr nclk);			void Resize(ClockCache *c, uptr nclk);
	void Reset(ClockCache *c);			void Reset(ClockCache *c);

	void DebugDump(int(printf)(const char s, ...));			void DebugDump(int(printf)(const char s, ...));

				// Clock element iterator.
				// Note: it iterates only over the table without regard to dirty entries.
				class Iter {
				public:
				explicit Iter(SyncClock* parent);
				Iter& operator++();
				bool operator!=(const Iter& other);
				ClockElem &operator*();

				private:
				SyncClock *parent_;
				// [pos_, end_) is the current continuous range of clock elements.
				ClockElem *pos_;
				ClockElem *end_;
				int block_; // Current number of second level block.

				NOINLINE void Next();
				};

				Iter begin();
				Iter end();

	private:			private:
	friend struct ThreadClock;			friend class ThreadClock;
				friend class Iter;
	static const uptr kDirtyTids = 2;			static const uptr kDirtyTids = 2;

				struct Dirty {
				u64 epoch : kClkBits;
				u64 tid : 64 - kClkBits; // kInvalidId if not active
				};

	unsigned release_store_tid_;			unsigned release_store_tid_;
	unsigned release_store_reused_;			unsigned release_store_reused_;
	unsigned dirty_tids_[kDirtyTids];			Dirty dirty_[kDirtyTids];
	// tab_ contains indirect pointer to a 512b block using DenseSlabAlloc.			// If size_ is 0, tab_ is nullptr.
	// If size_ <= 64, then tab_ points to an array with 64 ClockElem's.			// If size <= 64 (kClockCount), tab_ contains pointer to an array with
	// Otherwise, tab_ points to an array with 128 u32 elements,			// 64 ClockElem's (ClockBlock::clock).
				// Otherwise, tab_ points to an array with up to 127 u32 elements,
	// each pointing to the second-level 512b block with 64 ClockElem's.			// each pointing to the second-level 512b block with 64 ClockElem's.
				// Unused space in the first level ClockBlock is used to store additional
				// clock elements.
				// The last u32 element in the first level ClockBlock is always used as
				// reference counter.
				//
				// See the following scheme for details.
				// All memory blocks are 512 bytes (allocated from ClockAlloc).
				// Clock (clk) elements are 64 bits.
				// Idx and ref are 32 bits.
				//
				// tab_
				// \|
				// \/
				// +----------------------------------------------------+
				// \| clk128 \| clk129 \| ...unused... \| idx1 \| idx0 \| ref \|
				// +----------------------------------------------------+
				// \| \|
				// \| \/
				// \| +----------------+
				// \| \| clk0 ... clk63 \|
				// \| +----------------+
				// \/
				// +------------------+
				// \| clk64 ... clk127 \|
				// +------------------+
				//
				// Note: dirty entries, if active, always override what's stored in the clock.
	ClockBlock *tab_;			ClockBlock *tab_;
	u32 tab_idx_;			u32 tab_idx_;
	u32 size_;			u16 size_;
				u16 blocks_; // Number of second level blocks.

				void Unshare(ClockCache *c);
				bool IsShared() const;
				bool Cachable() const;
				alekseyshlUnsubmitted Not Done Reply Inline Actions Cachable -> Cacheable alekseyshl: Cachable -> Cacheable
	void ResetImpl();			void ResetImpl();
				void FlushDirty();
				uptr capacity() const;
				u32 get_block(uptr bi) const;
				void append_block(u32 idx);
	ClockElem &elem(unsigned tid) const;			ClockElem &elem(unsigned tid) const;
	};			};

	// The clock that lives in threads.			// The clock that lives in threads.
	struct ThreadClock {			class ThreadClock {
	public:			public:
	typedef DenseSlabAllocCache Cache;			typedef DenseSlabAllocCache Cache;

	explicit ThreadClock(unsigned tid, unsigned reused = 0);			explicit ThreadClock(unsigned tid, unsigned reused = 0);

	u64 get(unsigned tid) const {			u64 get(unsigned tid) const;
	DCHECK_LT(tid, kMaxTidInClock);
	return clk_[tid].epoch;
	}

	void set(ClockCache *c, unsigned tid, u64 v);			void set(ClockCache *c, unsigned tid, u64 v);
				void set(u64 v);
				void tick();
				uptr size() const;

	void set(u64 v) {			void acquire(ClockCache c, SyncClock src);
	DCHECK_GE(v, clk_[tid_].epoch);			void release(ClockCache c, SyncClock dst);
	clk_[tid_].epoch = v;
	}

	void tick() {
	clk_[tid_].epoch++;
	}

	uptr size() const {
	return nclk_;
	}

	void acquire(ClockCache c, const SyncClock src);
	void release(ClockCache c, SyncClock dst) const;
	void acq_rel(ClockCache c, SyncClock dst);			void acq_rel(ClockCache c, SyncClock dst);
	void ReleaseStore(ClockCache c, SyncClock dst) const;			void ReleaseStore(ClockCache c, SyncClock dst);
	void ResetCached(ClockCache *c);			void ResetCached(ClockCache *c);

	void DebugReset();			void DebugReset();
	void DebugDump(int(printf)(const char s, ...));			void DebugDump(int(printf)(const char s, ...));

	private:			private:
	static const uptr kDirtyTids = SyncClock::kDirtyTids;			static const uptr kDirtyTids = SyncClock::kDirtyTids;
				// Index of the thread associated with he clock ("current thread").
	const unsigned tid_;			const unsigned tid_;
	const unsigned reused_;			const unsigned reused_; // tid_ reuse count.
				// Current thread time when it acquired something from other threads.
	u64 last_acquire_;			u64 last_acquire_;

				// Cached SyncClock (without dirty entries and release_store_tid_).
				// We reuse it for subsequent store-release operations without intervening
				// acquire operations. Since it is shared (and thus constant), clock value
				// for the current thread is then stored in dirty entries in the SyncClock.
				// We host a refernece to the table while it is cached here.
				u32 cached_idx_;
				u16 cached_size_;
				u16 cached_blocks_;

				// Number of active elements in the clk_ table (the rest is zeros).
	uptr nclk_;			uptr nclk_;
	ClockElem clk_[kMaxTidInClock];			u64 clk_[kMaxTidInClock]; // Fixed size vector clock.

	bool IsAlreadyAcquired(const SyncClock *src) const;			bool IsAlreadyAcquired(const SyncClock *src) const;
	void UpdateCurrentThread(SyncClock *dst) const;			void UpdateCurrentThread(ClockCache c, SyncClock dst) const;
	};			};

				ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
				DCHECK_LT(tid, kMaxTidInClock);
				return clk_[tid];
				}

				ALWAYS_INLINE void ThreadClock::set(u64 v) {
				DCHECK_GE(v, clk_[tid_]);
				clk_[tid_] = v;
				}

				ALWAYS_INLINE void ThreadClock::tick() {
				clk_[tid_]++;
				}

				ALWAYS_INLINE uptr ThreadClock::size() const {
				return nclk_;
				}

				ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
				return Iter(this);
				}

				ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
				return Iter(nullptr);
				}

				ALWAYS_INLINE uptr SyncClock::size() const {
				return size_;
				}

				ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
				: parent_(parent)
				, pos_(nullptr)
				, end_(nullptr)
				, block_(-1) {
				if (parent)
				Next();
				}

				ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
				pos_++;
				if (UNLIKELY(pos_ >= end_))
				Next();
				return *this;
				}

				ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
				return parent_ != other.parent_;
				}

				ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
				return *pos_;
				}
	} // namespace __tsan			} // namespace __tsan

	#endif // TSAN_CLOCK_H			#endif // TSAN_CLOCK_H

lib/tsan/rtl/tsan_clock.cc

	Show First 20 Lines • Show All 55 Lines • ▼ Show 20 Lines
	// read-only phase, these reads will be O(1); if it later switches to read/write			// read-only phase, these reads will be O(1); if it later switches to read/write
	// phase, the implementation will correctly handle that by switching to O(N).			// phase, the implementation will correctly handle that by switching to O(N).
	//			//
	// Thread-safety note: all const operations on SyncClock's are conducted under			// Thread-safety note: all const operations on SyncClock's are conducted under
	// a shared lock; all non-const operations on SyncClock's are conducted under			// a shared lock; all non-const operations on SyncClock's are conducted under
	// an exclusive lock; ThreadClock's are private to respective threads and so			// an exclusive lock; ThreadClock's are private to respective threads and so
	// do not need any protection.			// do not need any protection.
	//			//
	// Description of ThreadClock state:
	// clk_ - fixed size vector clock.
	// nclk_ - effective size of the vector clock (the rest is zeros).
	// tid_ - index of the thread associated with he clock ("current thread").
	// last_acquire_ - current thread time when it acquired something from
	// other threads.
	//
	// Description of SyncClock state:			// Description of SyncClock state:
	// clk_ - variable size vector clock, low kClkBits hold timestamp,			// clk_ - variable size vector clock, low kClkBits hold timestamp,
	// the remaining bits hold "acquired" flag (the actual value is thread's			// the remaining bits hold "acquired" flag (the actual value is thread's
	// reused counter);			// reused counter);
	// if acquried == thr->reused_, then the respective thread has already			// if acquried == thr->reused_, then the respective thread has already
	// acquired this clock (except possibly dirty_tids_).			// acquired this clock (except possibly for dirty elements).
	// dirty_tids_ - holds up to two indeces in the vector clock that other threads			// dirty_ - holds up to two indeces in the vector clock that other threads
	// need to acquire regardless of "acquired" flag value;			// need to acquire regardless of "acquired" flag value;
	// release_store_tid_ - denotes that the clock state is a result of			// release_store_tid_ - denotes that the clock state is a result of
	// release-store operation by the thread with release_store_tid_ index.			// release-store operation by the thread with release_store_tid_ index.
	// release_store_reused_ - reuse count of release_store_tid_.			// release_store_reused_ - reuse count of release_store_tid_.

	// We don't have ThreadState in these methods, so this is an ugly hack that			// We don't have ThreadState in these methods, so this is an ugly hack that
	// works only in C++.			// works only in C++.
	#if !SANITIZER_GO			#if !SANITIZER_GO
	# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)			# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
	#else			#else
	# define CPP_STAT_INC(typ) (void)0			# define CPP_STAT_INC(typ) (void)0
	#endif			#endif

	namespace __tsan {			namespace __tsan {

				static atomic_uint32_t ref_ptr(ClockBlock cb) {
				return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
				}

				// Drop reference to the first level block idx.
				static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
				ClockBlock *cb = ctx->clock_alloc.Map(idx);
				atomic_uint32_t *ref = ref_ptr(cb);
				u32 v = atomic_load(ref, memory_order_acquire);
				for (;;) {
				CHECK_GT(v, 0);
				if (v == 1)
				break;
				if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
				return;
				}
				// First level block owns second level blocks, so them as well.
				for (uptr i = 0; i < blocks; i++)
				ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
				ctx->clock_alloc.Free(c, idx);
				}

	ThreadClock::ThreadClock(unsigned tid, unsigned reused)			ThreadClock::ThreadClock(unsigned tid, unsigned reused)
	: tid_(tid)			: tid_(tid)
	, reused_(reused + 1) { // 0 has special meaning			, reused_(reused + 1) // 0 has special meaning
				, cached_idx_()
				, cached_size_()
				, cached_blocks_() {
	CHECK_LT(tid, kMaxTidInClock);			CHECK_LT(tid, kMaxTidInClock);
	CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);			CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
	nclk_ = tid_ + 1;			nclk_ = tid_ + 1;
	last_acquire_ = 0;			last_acquire_ = 0;
	internal_memset(clk_, 0, sizeof(clk_));			internal_memset(clk_, 0, sizeof(clk_));
	clk_[tid_].reused = reused_;
	}			}

	void ThreadClock::ResetCached(ClockCache *c) {			void ThreadClock::ResetCached(ClockCache *c) {
				if (cached_idx_) {
				UnrefClockBlock(c, cached_idx_, cached_blocks_);
				cached_idx_ = 0;
				cached_size_ = 0;
				cached_blocks_ = 0;
				}
	}			}

	void ThreadClock::acquire(ClockCache c, const SyncClock src) {			void ThreadClock::acquire(ClockCache c, SyncClock src) {
	DCHECK_LE(nclk_, kMaxTid);			DCHECK_LE(nclk_, kMaxTid);
	DCHECK_LE(src->size_, kMaxTid);			DCHECK_LE(src->size_, kMaxTid);
	CPP_STAT_INC(StatClockAcquire);			CPP_STAT_INC(StatClockAcquire);

	// Check if it's empty -> no need to do anything.			// Check if it's empty -> no need to do anything.
	const uptr nclk = src->size_;			const uptr nclk = src->size_;
	if (nclk == 0) {			if (nclk == 0) {
	CPP_STAT_INC(StatClockAcquireEmpty);			CPP_STAT_INC(StatClockAcquireEmpty);
	return;			return;
	}			}

	// Check if we've already acquired src after the last release operation on src
	bool acquired = false;			bool acquired = false;
	if (nclk > tid_) {
	if (src->elem(tid_).reused == reused_) {
	for (unsigned i = 0; i < kDirtyTids; i++) {			for (unsigned i = 0; i < kDirtyTids; i++) {
	unsigned tid = src->dirty_tids_[i];			SyncClock::Dirty dirty = src->dirty_[i];
				unsigned tid = dirty.tid;
	if (tid != kInvalidTid) {			if (tid != kInvalidTid) {
	u64 epoch = src->elem(tid).epoch;			if (clk_[tid] < dirty.epoch) {
	if (clk_[tid].epoch < epoch) {			clk_[tid] = dirty.epoch;
	clk_[tid].epoch = epoch;
	acquired = true;			acquired = true;
	}			}
	}			}
	}			}
	if (acquired) {
	CPP_STAT_INC(StatClockAcquiredSomething);
	last_acquire_ = clk_[tid_].epoch;
	}
	return;
	}
	}

				// Check if we've already acquired src after the last release operation on src
				if (tid_ >= nclk \|\| src->elem(tid_).reused != reused_) {
	// O(N) acquire.			// O(N) acquire.
	CPP_STAT_INC(StatClockAcquireFull);			CPP_STAT_INC(StatClockAcquireFull);
	nclk_ = max(nclk_, nclk);			nclk_ = max(nclk_, nclk);
	for (uptr i = 0; i < nclk; i++) {			u64 *dst_pos = &clk_[0];
	u64 epoch = src->elem(i).epoch;			for (ClockElem &src_elem : *src) {
	if (clk_[i].epoch < epoch) {			u64 epoch = src_elem.epoch;
	clk_[i].epoch = epoch;			if (*dst_pos < epoch) {
				*dst_pos = epoch;
	acquired = true;			acquired = true;
	}			}
				dst_pos++;
	}			}

	// Remember that this thread has acquired this clock.			// Remember that this thread has acquired this clock.
	if (nclk > tid_)			if (nclk > tid_)
	src->elem(tid_).reused = reused_;			src->elem(tid_).reused = reused_;
				}

	if (acquired) {			if (acquired) {
	CPP_STAT_INC(StatClockAcquiredSomething);			CPP_STAT_INC(StatClockAcquiredSomething);
	last_acquire_ = clk_[tid_].epoch;			last_acquire_ = clk_[tid_];
				ResetCached(c);
	}			}
	}			}

	void ThreadClock::release(ClockCache c, SyncClock dst) const {			void ThreadClock::release(ClockCache c, SyncClock dst) {
	DCHECK_LE(nclk_, kMaxTid);			DCHECK_LE(nclk_, kMaxTid);
	DCHECK_LE(dst->size_, kMaxTid);			DCHECK_LE(dst->size_, kMaxTid);

	if (dst->size_ == 0) {			if (dst->size_ == 0) {
	// ReleaseStore will correctly set release_store_tid_,			// ReleaseStore will correctly set release_store_tid_,
	// which can be important for future operations.			// which can be important for future operations.
	ReleaseStore(c, dst);			ReleaseStore(c, dst);
	return;			return;
	}			}

	CPP_STAT_INC(StatClockRelease);			CPP_STAT_INC(StatClockRelease);
	// Check if we need to resize dst.			// Check if we need to resize dst.
	if (dst->size_ < nclk_)			if (dst->size_ < nclk_)
	dst->Resize(c, nclk_);			dst->Resize(c, nclk_);

	// Check if we had not acquired anything from other threads			// Check if we had not acquired anything from other threads
	// since the last release on dst. If so, we need to update			// since the last release on dst. If so, we need to update
	// only dst->elem(tid_).			// only dst->elem(tid_).
	if (dst->elem(tid_).epoch > last_acquire_) {			if (dst->elem(tid_).epoch > last_acquire_) {
	UpdateCurrentThread(dst);			UpdateCurrentThread(c, dst);
	if (dst->release_store_tid_ != tid_ \|\|			if (dst->release_store_tid_ != tid_ \|\|
	dst->release_store_reused_ != reused_)			dst->release_store_reused_ != reused_)
	dst->release_store_tid_ = kInvalidTid;			dst->release_store_tid_ = kInvalidTid;
	return;			return;
	}			}

	// O(N) release.			// O(N) release.
	CPP_STAT_INC(StatClockReleaseFull);			CPP_STAT_INC(StatClockReleaseFull);
				dst->Unshare(c);
	// First, remember whether we've acquired dst.			// First, remember whether we've acquired dst.
	bool acquired = IsAlreadyAcquired(dst);			bool acquired = IsAlreadyAcquired(dst);
	if (acquired)			if (acquired)
	CPP_STAT_INC(StatClockReleaseAcquired);			CPP_STAT_INC(StatClockReleaseAcquired);
	// Update dst->clk_.			// Update dst->clk_.
	for (uptr i = 0; i < nclk_; i++) {			dst->FlushDirty();
	ClockElem &ce = dst->elem(i);			uptr i = 0;
	ce.epoch = max(ce.epoch, clk_[i].epoch);			for (ClockElem &ce : *dst) {
				ce.epoch = max(ce.epoch, clk_[i]);
	ce.reused = 0;			ce.reused = 0;
				i++;
	}			}
	// Clear 'acquired' flag in the remaining elements.			// Clear 'acquired' flag in the remaining elements.
	if (nclk_ < dst->size_)			if (nclk_ < dst->size_)
	CPP_STAT_INC(StatClockReleaseClearTail);			CPP_STAT_INC(StatClockReleaseClearTail);
	for (uptr i = nclk_; i < dst->size_; i++)			for (uptr i = nclk_; i < dst->size_; i++)
	dst->elem(i).reused = 0;			dst->elem(i).reused = 0;
	for (unsigned i = 0; i < kDirtyTids; i++)
	dst->dirty_tids_[i] = kInvalidTid;
	dst->release_store_tid_ = kInvalidTid;			dst->release_store_tid_ = kInvalidTid;
	dst->release_store_reused_ = 0;			dst->release_store_reused_ = 0;
	// If we've acquired dst, remember this fact,			// If we've acquired dst, remember this fact,
	// so that we don't need to acquire it on next acquire.			// so that we don't need to acquire it on next acquire.
	if (acquired)			if (acquired)
	dst->elem(tid_).reused = reused_;			dst->elem(tid_).reused = reused_;
	}			}

	void ThreadClock::ReleaseStore(ClockCache c, SyncClock dst) const {			void ThreadClock::ReleaseStore(ClockCache c, SyncClock dst) {
	DCHECK_LE(nclk_, kMaxTid);			DCHECK_LE(nclk_, kMaxTid);
	DCHECK_LE(dst->size_, kMaxTid);			DCHECK_LE(dst->size_, kMaxTid);
	CPP_STAT_INC(StatClockStore);			CPP_STAT_INC(StatClockStore);

				if (dst->size_ == 0 && cached_idx_ != 0) {
				// Reuse the cached clock.
				// Note: we could reuse/cache the cached clock in more cases:
				// we could update the existing clock and cache it, or replace it with the
				// currently cached clock and release the old one. And for a shared
				// existing clock, we could replace it with the currently cached;
				// or unshare, update and cache. But, for simplicity, we currnetly reuse
				alekseyshlUnsubmitted Done Reply Inline Actions but, for simplicity, we currently alekseyshl: but, for simplicity, we currently
				// cached clock only when the target clock is empty.
				dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
				dst->tab_idx_ = cached_idx_;
				dst->size_ = cached_size_;
				dst->blocks_ = cached_blocks_;
				CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);
				// The cached clock is shared (immutable),
				// so this is where we store the current clock.
				dst->dirty_[0].tid = tid_;
				dst->dirty_[0].epoch = clk_[tid_];
				dst->release_store_tid_ = tid_;
				dst->release_store_reused_ = reused_;
				// Rememeber that we don't need to acquire it in future.
				dst->elem(tid_).reused = reused_;
				// Grab a reference.
				atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
				alekseyshlUnsubmitted Done Reply Inline Actions For my education, why increasing refcount needs less strict memory order than decreasing it? alekseyshl: For my education, why increasing refcount needs less strict memory order than decreasing it?
				dvyukovAuthorUnsubmitted Not Done Reply Inline Actions Because with traditional ref counting a thread can only acquire a reference to an object only if it already owns one. So it actually does not acquire anything new and acquire is not a synchronization operation. While release operation does release current thread's ownership of the object and is a synchronization operation. dvyukov: Because with traditional ref counting a thread can only acquire a reference to an object only…
				return;
				}

	// Check if we need to resize dst.			// Check if we need to resize dst.
	if (dst->size_ < nclk_)			if (dst->size_ < nclk_)
	dst->Resize(c, nclk_);			dst->Resize(c, nclk_);

	if (dst->release_store_tid_ == tid_ &&			if (dst->release_store_tid_ == tid_ &&
	dst->release_store_reused_ == reused_ &&			dst->release_store_reused_ == reused_ &&
	dst->elem(tid_).epoch > last_acquire_) {			dst->elem(tid_).epoch > last_acquire_) {
	CPP_STAT_INC(StatClockStoreFast);			CPP_STAT_INC(StatClockStoreFast);
	UpdateCurrentThread(dst);			UpdateCurrentThread(c, dst);
	return;			return;
	}			}

	// O(N) release-store.			// O(N) release-store.
	CPP_STAT_INC(StatClockStoreFull);			CPP_STAT_INC(StatClockStoreFull);
	for (uptr i = 0; i < nclk_; i++) {			dst->Unshare(c);
	ClockElem &ce = dst->elem(i);			// Note: dst can be larger than this ThreadClock.
	ce.epoch = clk_[i].epoch;			// This is fine since clk_ beyond size is all zeros.
	ce.reused = 0;			uptr i = 0;
	}			for (ClockElem &ce : *dst) {
	// Clear the tail of dst->clk_.			ce.epoch = clk_[i];
	if (nclk_ < dst->size_) {
	for (uptr i = nclk_; i < dst->size_; i++) {
	ClockElem &ce = dst->elem(i);
	ce.epoch = 0;
	ce.reused = 0;			ce.reused = 0;
				i++;
	}			}
	CPP_STAT_INC(StatClockStoreTail);			for (uptr i = 0; i < kDirtyTids; i++)
	}			dst->dirty_[i].tid = kInvalidTid;
	for (unsigned i = 0; i < kDirtyTids; i++)
	dst->dirty_tids_[i] = kInvalidTid;
	dst->release_store_tid_ = tid_;			dst->release_store_tid_ = tid_;
	dst->release_store_reused_ = reused_;			dst->release_store_reused_ = reused_;
	// Rememeber that we don't need to acquire it in future.			// Rememeber that we don't need to acquire it in future.
	dst->elem(tid_).reused = reused_;			dst->elem(tid_).reused = reused_;

				// If the resulting clock is cachable, cache it for future release operations.
				// The clock is always cachable if we released to an empty sync object.
				if (cached_idx_ == 0 && dst->Cachable()) {
				// Grab a reference to the ClockBlock.
				atomic_uint32_t *ref = ref_ptr(dst->tab_);
				if (atomic_load(ref, memory_order_acquire) == 1)
				atomic_store_relaxed(ref, 2);
				else
				atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
				cached_idx_ = dst->tab_idx_;
				cached_size_ = dst->size_;
				cached_blocks_ = dst->blocks_;
				}
	}			}

	void ThreadClock::acq_rel(ClockCache c, SyncClock dst) {			void ThreadClock::acq_rel(ClockCache c, SyncClock dst) {
	CPP_STAT_INC(StatClockAcquireRelease);			CPP_STAT_INC(StatClockAcquireRelease);
	acquire(c, dst);			acquire(c, dst);
	ReleaseStore(c, dst);			ReleaseStore(c, dst);
	}			}

	// Updates only single element related to the current thread in dst->clk_.			// Updates only single element related to the current thread in dst->clk_.
	void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {			void ThreadClock::UpdateCurrentThread(ClockCache c, SyncClock dst) const {
	// Update the threads time, but preserve 'acquired' flag.			// Update the threads time, but preserve 'acquired' flag.
	dst->elem(tid_).epoch = clk_[tid_].epoch;

	for (unsigned i = 0; i < kDirtyTids; i++) {			for (unsigned i = 0; i < kDirtyTids; i++) {
	if (dst->dirty_tids_[i] == tid_) {			SyncClock::Dirty *dirty = &dst->dirty_[i];
	CPP_STAT_INC(StatClockReleaseFast);			const unsigned tid = dirty->tid;
	return;			if (tid == tid_ \|\| tid == kInvalidTid) {
	}
	if (dst->dirty_tids_[i] == kInvalidTid) {
	CPP_STAT_INC(StatClockReleaseFast);			CPP_STAT_INC(StatClockReleaseFast);
	dst->dirty_tids_[i] = tid_;			dirty->tid = tid_;
				dirty->epoch = clk_[tid_];
	return;			return;
	}			}
	}			}
	// Reset all 'acquired' flags, O(N).			// Reset all 'acquired' flags, O(N).
				// We are going to touch dst elements, so we need to unshare it.
				dst->Unshare(c);
	CPP_STAT_INC(StatClockReleaseSlow);			CPP_STAT_INC(StatClockReleaseSlow);
				dst->elem(tid_).epoch = clk_[tid_];
	for (uptr i = 0; i < dst->size_; i++)			for (uptr i = 0; i < dst->size_; i++)
	dst->elem(i).reused = 0;			dst->elem(i).reused = 0;
	for (unsigned i = 0; i < kDirtyTids; i++)			dst->FlushDirty();
	dst->dirty_tids_[i] = kInvalidTid;
	}			}

	// Checks whether the current threads has already acquired src.			// Checks whether the current thread has already acquired src.
				alekseyshlUnsubmitted Done Reply Inline Actions threads -> thread alekseyshl: threads -> thread
	bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {			bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
	if (src->elem(tid_).reused != reused_)			if (src->elem(tid_).reused != reused_)
	return false;			return false;
	for (unsigned i = 0; i < kDirtyTids; i++) {			for (unsigned i = 0; i < kDirtyTids; i++) {
	unsigned tid = src->dirty_tids_[i];			SyncClock::Dirty dirty = src->dirty_[i];
	if (tid != kInvalidTid) {			if (dirty.tid != kInvalidTid) {
	if (clk_[tid].epoch < src->elem(tid).epoch)			if (clk_[dirty.tid] < dirty.epoch)
	return false;			return false;
	}			}
	}			}
	return true;			return true;
	}			}

	// Sets a single element in the vector clock.			// Sets a single element in the vector clock.
	// This function is called only from weird places like AcquireGlobal.			// This function is called only from weird places like AcquireGlobal.
	void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {			void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
	DCHECK_LT(tid, kMaxTid);			DCHECK_LT(tid, kMaxTid);
	DCHECK_GE(v, clk_[tid].epoch);			DCHECK_GE(v, clk_[tid]);
	clk_[tid].epoch = v;			clk_[tid] = v;
	if (nclk_ <= tid)			if (nclk_ <= tid)
	nclk_ = tid + 1;			nclk_ = tid + 1;
	last_acquire_ = clk_[tid_].epoch;			last_acquire_ = clk_[tid_];
				ResetCached(c);
	}			}

	void ThreadClock::DebugDump(int(printf)(const char s, ...)) {			void ThreadClock::DebugDump(int(printf)(const char s, ...)) {
	printf("clock=[");			printf("clock=[");
	for (uptr i = 0; i < nclk_; i++)			for (uptr i = 0; i < nclk_; i++)
	printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);			printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
	printf("] reused=[");			printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
	for (uptr i = 0; i < nclk_; i++)
	printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
	printf("] tid=%u/%u last_acq=%llu",
	tid_, reused_, last_acquire_);
	}			}

	SyncClock::SyncClock() {			SyncClock::SyncClock() {
	ResetImpl();			ResetImpl();
	}			}

	SyncClock::~SyncClock() {			SyncClock::~SyncClock() {
	// Reset must be called before dtor.			// Reset must be called before dtor.
	CHECK_EQ(size_, 0);			CHECK_EQ(size_, 0);
				CHECK_EQ(blocks_, 0);
	CHECK_EQ(tab_, 0);			CHECK_EQ(tab_, 0);
	CHECK_EQ(tab_idx_, 0);			CHECK_EQ(tab_idx_, 0);
	}			}

	void SyncClock::Reset(ClockCache *c) {			void SyncClock::Reset(ClockCache *c) {
	if (size_ == 0) {			if (size_)
	// nothing			UnrefClockBlock(c, tab_idx_, blocks_);
	} else if (size_ <= ClockBlock::kClockCount) {
	// One-level table.
	ctx->clock_alloc.Free(c, tab_idx_);
	} else {
	// Two-level table.
	for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
	ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
	ctx->clock_alloc.Free(c, tab_idx_);
	}
	ResetImpl();			ResetImpl();
	}			}

	void SyncClock::ResetImpl() {			void SyncClock::ResetImpl() {
	tab_ = 0;			tab_ = 0;
	tab_idx_ = 0;			tab_idx_ = 0;
	size_ = 0;			size_ = 0;
				blocks_ = 0;
	release_store_tid_ = kInvalidTid;			release_store_tid_ = kInvalidTid;
	release_store_reused_ = 0;			release_store_reused_ = 0;
	for (uptr i = 0; i < kDirtyTids; i++)			for (uptr i = 0; i < kDirtyTids; i++)
	dirty_tids_[i] = kInvalidTid;			dirty_[i].tid = kInvalidTid;
	}			}

	void SyncClock::Resize(ClockCache *c, uptr nclk) {			void SyncClock::Resize(ClockCache *c, uptr nclk) {
	CPP_STAT_INC(StatClockReleaseResize);			CPP_STAT_INC(StatClockReleaseResize);
	if (RoundUpTo(nclk, ClockBlock::kClockCount) <=			Unshare(c);
	RoundUpTo(size_, ClockBlock::kClockCount)) {			if (nclk <= capacity()) {
	// Growing within the same block.
	// Memory is already allocated, just increase the size.			// Memory is already allocated, just increase the size.
	size_ = nclk;			size_ = nclk;
	return;			return;
	}			}
	if (nclk <= ClockBlock::kClockCount) {			if (size_ == 0) {
	// Grow from 0 to one-level table.			// Grow from 0 to one-level table.
	CHECK_EQ(size_, 0);			CHECK_EQ(size_, 0);
				CHECK_EQ(blocks_, 0);
	CHECK_EQ(tab_, 0);			CHECK_EQ(tab_, 0);
	CHECK_EQ(tab_idx_, 0);			CHECK_EQ(tab_idx_, 0);
	size_ = nclk;
	tab_idx_ = ctx->clock_alloc.Alloc(c);			tab_idx_ = ctx->clock_alloc.Alloc(c);
	tab_ = ctx->clock_alloc.Map(tab_idx_);			tab_ = ctx->clock_alloc.Map(tab_idx_);
	internal_memset(tab_, 0, sizeof(*tab_));			internal_memset(tab_, 0, sizeof(*tab_));
	return;			atomic_store_relaxed(ref_ptr(tab_), 1);
	}			size_ = 1;
	// Growing two-level table.			} else if (size_ > blocks_ * ClockBlock::kClockCount) {
	if (size_ == 0) {			u32 idx = ctx->clock_alloc.Alloc(c);
	// Allocate first level table.			ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
	tab_idx_ = ctx->clock_alloc.Alloc(c);			uptr top = size_ - blocks_ * ClockBlock::kClockCount;
	tab_ = ctx->clock_alloc.Map(tab_idx_);			CHECK_LT(top, ClockBlock::kClockCount);
	internal_memset(tab_, 0, sizeof(*tab_));			const uptr move = top * sizeof(tab_->clock[0]);
	} else if (size_ <= ClockBlock::kClockCount) {			internal_memcpy(&new_cb->clock[0], tab_->clock, move);
	// Transform one-level table to two-level table.			internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
	u32 old = tab_idx_;			internal_memset(tab_->clock, 0, move);
	tab_idx_ = ctx->clock_alloc.Alloc(c);			append_block(idx);
	tab_ = ctx->clock_alloc.Map(tab_idx_);
	internal_memset(tab_, 0, sizeof(*tab_));
	tab_->table[0] = old;
	}			}
	// At this point we have first level table allocated.			// At this point we have first level table allocated and all clock elements
				// are evacuated from it to a second level block.
	// Add second level tables as necessary.			// Add second level tables as necessary.
	for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);			while (nclk > capacity()) {
	i < nclk; i += ClockBlock::kClockCount) {
	u32 idx = ctx->clock_alloc.Alloc(c);			u32 idx = ctx->clock_alloc.Alloc(c);
	ClockBlock *cb = ctx->clock_alloc.Map(idx);			ClockBlock *cb = ctx->clock_alloc.Map(idx);
	internal_memset(cb, 0, sizeof(*cb));			internal_memset(cb, 0, sizeof(*cb));
	CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);			append_block(idx);
	tab_->table[i/ClockBlock::kClockCount] = idx;
	}			}
	size_ = nclk;			size_ = nclk;
	}			}

	ClockElem &SyncClock::elem(unsigned tid) const {			// Flushes all dirty elements into the main clock array.
				void SyncClock::FlushDirty() {
				for (unsigned i = 0; i < kDirtyTids; i++) {
				Dirty *dirty = &dirty_[i];
				if (dirty->tid != kInvalidTid) {
				CHECK_LT(dirty->tid, size_);
				elem(dirty->tid).epoch = dirty->epoch;
				dirty->tid = kInvalidTid;
				}
				}
				}

				bool SyncClock::IsShared() const {
				if (size_ == 0)
				return false;
				atomic_uint32_t *ref = ref_ptr(tab_);
				u32 v = atomic_load(ref, memory_order_acquire);
				CHECK_GT(v, 0);
				return v > 1;
				}

				// Unshares the current clock if it's shared.
				// Shared clocks are immutable, so they need to be unshared before any updates.
				// Note: this does not apply to dirty entries as they are not shared.
				void SyncClock::Unshare(ClockCache *c) {
				if (!IsShared())
				return;
				// First, copy current state into old.
				SyncClock old;
				old.tab_ = tab_;
				old.tab_idx_ = tab_idx_;
				old.size_ = size_;
				old.blocks_ = blocks_;
				old.release_store_tid_ = release_store_tid_;
				old.release_store_reused_ = release_store_reused_;
				for (unsigned i = 0; i < kDirtyTids; i++)
				old.dirty_[i] = dirty_[i];
				// Then, clear current object.
				ResetImpl();
				// Allocate brand new clock in the current object.
				Resize(c, old.size_);
				// Now copy state back into this object.
				Iter old_iter(&old);
				for (ClockElem &ce : *this) {
				ce = *old_iter;
				++old_iter;
				}
				release_store_tid_ = old.release_store_tid_;
				release_store_reused_ = old.release_store_reused_;
				for (unsigned i = 0; i < kDirtyTids; i++)
				dirty_[i] = old.dirty_[i];
				// Drop reference to old and delete if necessary.
				old.Reset(c);
				}

				// Can we cache this clock for future release operations?
				ALWAYS_INLINE bool SyncClock::Cachable() const {
				if (size_ == 0)
				return false;
				for (unsigned i = 0; i < kDirtyTids; i++) {
				if (dirty_[i].tid != kInvalidTid)
				return false;
				}
				return atomic_load_relaxed(ref_ptr(tab_)) == 1;
				}

				// elem linearizes the two-level structure into linear array.
				// Note: this is used only for one time accesses, vector operations use
				// the iterator as it is much faster.
				ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
				alekseyshlUnsubmitted Done Reply Inline Actions dirst -> dirty alekseyshl: dirst -> dirty
	DCHECK_LT(tid, size_);			DCHECK_LT(tid, size_);
	if (size_ <= ClockBlock::kClockCount)			const uptr block = tid / ClockBlock::kClockCount;
				DCHECK_LE(block, blocks_);
				tid %= ClockBlock::kClockCount;
				if (block == blocks_)
	return tab_->clock[tid];			return tab_->clock[tid];
	u32 idx = tab_->table[tid / ClockBlock::kClockCount];			u32 idx = get_block(block);
	ClockBlock *cb = ctx->clock_alloc.Map(idx);			ClockBlock *cb = ctx->clock_alloc.Map(idx);
	return cb->clock[tid % ClockBlock::kClockCount];			return cb->clock[tid];
				}

				ALWAYS_INLINE uptr SyncClock::capacity() const {
				if (size_ == 0)
				return 0;
				uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
				// How many clock elements we can fit into the first level block.
				// +1 for ref counter.
				uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
				return blocks_ * ClockBlock::kClockCount + top;
				}

				ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
				DCHECK(size_);
				DCHECK_LT(bi, blocks_);
				return tab_->table[ClockBlock::kBlockIdx - bi];
				}

				ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
				uptr bi = blocks_++;
				CHECK_EQ(get_block(bi), 0);
				tab_->table[ClockBlock::kBlockIdx - bi] = idx;
				}

				// Used only by tests.
				u64 SyncClock::get(unsigned tid) const {
				for (unsigned i = 0; i < kDirtyTids; i++) {
				Dirty dirty = dirty_[i];
				if (dirty.tid == tid)
				return dirty.epoch;
				}
				return elem(tid).epoch;
				}

				// Used only by Iter test.
				u64 SyncClock::get_clean(unsigned tid) const {
				return elem(tid).epoch;
	}			}

	void SyncClock::DebugDump(int(printf)(const char s, ...)) {			void SyncClock::DebugDump(int(printf)(const char s, ...)) {
	printf("clock=[");			printf("clock=[");
	for (uptr i = 0; i < size_; i++)			for (uptr i = 0; i < size_; i++)
	printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);			printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
	printf("] reused=[");			printf("] reused=[");
	for (uptr i = 0; i < size_; i++)			for (uptr i = 0; i < size_; i++)
	printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);			printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
	printf("] release_store_tid=%d/%d dirty_tids=%d/%d",			printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
	release_store_tid_, release_store_reused_,			release_store_tid_, release_store_reused_,
	dirty_tids_[0], dirty_tids_[1]);			dirty_[0].tid, dirty_[0].epoch,
				dirty_[1].tid, dirty_[1].epoch);
				}

				void SyncClock::Iter::Next() {
				// Finished with the current block, move on to the next one.
				block_++;
				if (block_ < parent_->blocks_) {
				// Iterate over the next second level block.
				u32 idx = parent_->get_block(block_);
				ClockBlock *cb = ctx->clock_alloc.Map(idx);
				pos_ = &cb->clock[0];
				end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
				ClockBlock::kClockCount);
				return;
				}
				if (block_ == parent_->blocks_ &&
				parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
				// Iterate over elements in the first level block.
				pos_ = &parent_->tab_->clock[0];
				end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
				ClockBlock::kClockCount);
				return;
				}
				parent_ = nullptr; // denotes end
	}			}
	} // namespace __tsan			} // namespace __tsan

lib/tsan/rtl/tsan_defs.h

	Show All 32 Lines
	# define TSAN_CONTAINS_UBSAN 1			# define TSAN_CONTAINS_UBSAN 1
	# else			# else
	# define TSAN_CONTAINS_UBSAN 0			# define TSAN_CONTAINS_UBSAN 0
	# endif			# endif
	#endif			#endif

	namespace __tsan {			namespace __tsan {

				const int kClkBits = 42;
				const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;

				struct ClockElem {
				u64 epoch : kClkBits;
				u64 reused : 64 - kClkBits; // tid reuse count
				};

				struct ClockBlock {
				static const uptr kSize = 512;
				static const uptr kTableSize = kSize / sizeof(u32);
				static const uptr kClockCount = kSize / sizeof(ClockElem);
				static const uptr kRefIdx = kTableSize - 1;
				static const uptr kBlockIdx = kTableSize - 2;

				union {
				u32 table[kTableSize];
				ClockElem clock[kClockCount];
				};

				ClockBlock() {
				}
				};

	const int kTidBits = 13;			const int kTidBits = 13;
	const unsigned kMaxTid = 1 << kTidBits;			// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is
				// occupied by reference counter, so total number of elements we can store
				// in SyncClock is kClockCount * (kTableSize - 1).
				const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount;
	#if !SANITIZER_GO			#if !SANITIZER_GO
	const unsigned kMaxTidInClock = kMaxTid * 2; // This includes msb 'freed' bit.			const unsigned kMaxTidInClock = kMaxTid * 2; // This includes msb 'freed' bit.
	#else			#else
	const unsigned kMaxTidInClock = kMaxTid; // Go does not track freed memory.			const unsigned kMaxTidInClock = kMaxTid; // Go does not track freed memory.
	#endif			#endif
	const int kClkBits = 42;
	const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
	const uptr kShadowStackSize = 64 * 1024;			const uptr kShadowStackSize = 64 * 1024;

	// Count of shadow values in a shadow cell.			// Count of shadow values in a shadow cell.
	const uptr kShadowCnt = 4;			const uptr kShadowCnt = 4;

	// That many user bytes are mapped onto a single shadow cell.			// That many user bytes are mapped onto a single shadow cell.
	const uptr kShadowCell = 8;			const uptr kShadowCell = 8;

	Show All 11 Lines
	const uptr kMetaShadowSize = 4;			const uptr kMetaShadowSize = 4;

	#if TSAN_NO_HISTORY			#if TSAN_NO_HISTORY
	const bool kCollectHistory = false;			const bool kCollectHistory = false;
	#else			#else
	const bool kCollectHistory = true;			const bool kCollectHistory = true;
	#endif			#endif

	const unsigned kInvalidTid = (unsigned)-1;			const u16 kInvalidTid = kMaxTid + 1;
				alekseyshlUnsubmitted Done Reply Inline Actions Curious why invalid tid has more bits than KTidBits but less bits than tid field in Dirty struct (22)? alekseyshl: Curious why invalid tid has more bits than KTidBits but less bits than tid field in Dirty…
				dvyukovAuthorUnsubmitted Not Done Reply Inline Actions The current version does not work because we now put it into a bitfield with 22 bits (64-kClkBits), so we get warnings about constant truncation. the const gets truncated and then becomes not equal to kInvalidTid after extraction (it's unsigned so it's not sign extended) 0xffff solves both problems. Changed it to kMaxTid + 1, it's less magical this way. dvyukov: The current version does not work because we now put it into a bitfield with 22 bits (64…

	// The following "build consistency" machinery ensures that all source files			// The following "build consistency" machinery ensures that all source files
	// are built in the same configuration. Inconsistent builds lead to			// are built in the same configuration. Inconsistent builds lead to
	// hard to debug crashes.			// hard to debug crashes.
	#if SANITIZER_DEBUG			#if SANITIZER_DEBUG
	void build_consistency_debug();			void build_consistency_debug();
	#else			#else
	void build_consistency_release();			void build_consistency_release();
	▲ Show 20 Lines • Show All 86 Lines • Show Last 20 Lines

lib/tsan/tests/unit/tsan_clock_test.cc

Show First 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	TEST(Clock, ChunkedBasic) {
ASSERT_EQ(vector.size(), 1U);		ASSERT_EQ(vector.size(), 1U);
ASSERT_EQ(chunked.size(), 1U);		ASSERT_EQ(chunked.size(), 1U);
vector.acq_rel(&cache, &chunked);		vector.acq_rel(&cache, &chunked);
ASSERT_EQ(vector.size(), 1U);		ASSERT_EQ(vector.size(), 1U);
ASSERT_EQ(chunked.size(), 1U);		ASSERT_EQ(chunked.size(), 1U);
chunked.Reset(&cache);		chunked.Reset(&cache);
}		}

		static const uptr interesting_sizes[] = {0, 1, 2, 30, 61, 62, 63, 64, 65, 66,
		100, 124, 125, 126, 127, 128, 129, 130, 188, 189, 190, 191, 192, 193, 254,
		255};

		alekseyshlUnsubmitted Done Reply Inline Actions ARRAY_SIZE alekseyshl: ARRAY_SIZE
		TEST(Clock, Iter) {
		const uptr n = ARRAY_SIZE(interesting_sizes);
		for (uptr fi = 0; fi < n; fi++) {
		const uptr size = interesting_sizes[fi];
		SyncClock sync;
		ThreadClock vector(0);
		for (uptr i = 0; i < size; i++)
		vector.set(&cache, i, i + 1);
		if (size != 0)
		vector.release(&cache, &sync);
		uptr i = 0;
		for (ClockElem &ce : sync) {
		ASSERT_LT(i, size);
		ASSERT_EQ(sync.get_clean(i), ce.epoch);
		i++;
		}
		ASSERT_EQ(i, size);
		sync.Reset(&cache);
		}
		}

TEST(Clock, AcquireRelease) {		TEST(Clock, AcquireRelease) {
ThreadClock vector1(100);		ThreadClock vector1(100);
vector1.tick();		vector1.tick();
SyncClock chunked;		SyncClock chunked;
vector1.release(&cache, &chunked);		vector1.release(&cache, &chunked);
ASSERT_EQ(chunked.size(), 101U);		ASSERT_EQ(chunked.size(), 101U);
ThreadClock vector2(0);		ThreadClock vector2(0);
vector2.acquire(&cache, &chunked);		vector2.acquire(&cache, &chunked);
▲ Show 20 Lines • Show All 147 Lines • ▼ Show 20 Lines	TEST(Clock, Growth) {
ASSERT_EQ(sync.get(99), 0ULL);		ASSERT_EQ(sync.get(99), 0ULL);
ASSERT_EQ(sync.get(100), 1ULL);		ASSERT_EQ(sync.get(100), 1ULL);
sync.Reset(&cache);		sync.Reset(&cache);
}		}
}		}

TEST(Clock, Growth2) {		TEST(Clock, Growth2) {
// Test clock growth for every pair of sizes:		// Test clock growth for every pair of sizes:
const uptr sizes[] = {0, 1, 2, 30, 61, 62, 63, 64, 65, 66, 100, 124, 125, 126,		const uptr n = ARRAY_SIZE(interesting_sizes);
127, 128, 129, 130, 188, 189, 190, 191, 192, 193, 254, 255};
const uptr n = sizeof(sizes) / sizeof(sizes[0]);
for (uptr fi = 0; fi < n; fi++) {		for (uptr fi = 0; fi < n; fi++) {
for (uptr ti = fi + 1; ti < n; ti++) {		for (uptr ti = fi + 1; ti < n; ti++) {
const uptr from = sizes[fi];		const uptr from = interesting_sizes[fi];
const uptr to = sizes[ti];		const uptr to = interesting_sizes[ti];
SyncClock sync;		SyncClock sync;
ThreadClock vector(0);		ThreadClock vector(0);
for (uptr i = 0; i < from; i++)		for (uptr i = 0; i < from; i++)
vector.set(&cache, i, i + 1);		vector.set(&cache, i, i + 1);
if (from != 0)		if (from != 0)
vector.release(&cache, &sync);		vector.release(&cache, &sync);
ASSERT_EQ(sync.size(), from);		ASSERT_EQ(sync.size(), from);
for (uptr i = 0; i < from; i++)		for (uptr i = 0; i < from; i++)
▲ Show 20 Lines • Show All 238 Lines • Show Last 20 Lines