This is an archive of the discontinued LLVM Phabricator instance.

Differential D100348

[hwasan] Fix lock contention on thread creation.
ClosedPublic

Authored by eugenis on Apr 12 2021, 3:22 PM.

Download Raw Diff

Details

Reviewers

vitalybuka

Commits

rG51aa61e74bdb: [hwasan] Fix lock contention on thread creation.

Summary

Do not hold the free/live thread list lock longer than necessary.
This change speeds up the following benchmark 10x.

constexpr int kTopThreads = 50;
constexpr int kChildThreads = 20;
constexpr int kChildIterations = 8;

void Thread() {

for (int i = 0; i < kChildIterations; ++i) {
  std::vector<std::thread> threads;
  for (int i = 0; i < kChildThreads; ++i)
    threads.emplace_back([](){});
  for (auto& t : threads)
    t.join();
}

}

int main() {

std::vector<std::thread> threads;
for (int i = 0; i < kTopThreads; ++i)
  threads.emplace_back(Thread);
for (auto& t : threads)
  t.join();

}

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

eugenis requested review of this revision.Apr 12 2021, 3:22 PM

eugenis created this revision.

Herald added a project: Restricted Project. · View Herald TranscriptApr 12 2021, 3:22 PM

Herald added a subscriber: Restricted Project. · View Herald Transcript

Harbormaster completed remote builds in B98373: Diff 336979.Apr 12 2021, 4:12 PM

vitalybuka accepted this revision.Apr 14 2021, 12:37 PM

vitalybuka added inline comments.

compiler-rt/lib/hwasan/hwasan_thread_list.h
92	unrelated to the patch but maybe free_list_ and live_list_ could have each own mutex
137	{} redundant in this case

This revision is now accepted and ready to land.Apr 14 2021, 12:37 PM

addressed comments

compiler-rt/lib/hwasan/hwasan_thread_list.h
92	I've actually tried this and could not see a measurable perf improvement.

Harbormaster completed remote builds in B98740: Diff 337524.Apr 14 2021, 1:28 PM

vitalybuka added inline comments.Apr 14 2021, 3:01 PM

compiler-rt/lib/hwasan/hwasan_thread_list.h
92	To my taste it would be cleaner that way: different resources rely on different mutexes. This bug will less likely to happen in the first place :)

split locks for live and free lists

vitalybuka accepted this revision.Apr 14 2021, 3:56 PM

Harbormaster completed remote builds in B98778: Diff 337575.Apr 14 2021, 5:07 PM

Closed by commit rG51aa61e74bdb: [hwasan] Fix lock contention on thread creation. (authored by eugenis). · Explain WhyApr 14 2021, 5:14 PM

This revision was automatically updated to reflect the committed changes.

eugenis added a commit: rG51aa61e74bdb: [hwasan] Fix lock contention on thread creation..

eugenis mentioned this in D101881: [hwasan] Fix missing synchronization in AllocThread..May 4 2021, 7:51 PM

eugenis mentioned this in rG18959a6a094c: [hwasan] Fix missing synchronization in AllocThread..May 5 2021, 11:58 AM

Revision Contents

Path

Size

compiler-rt/

lib/

hwasan/

hwasan_thread_list.h

27 lines

Diff 337585

compiler-rt/lib/hwasan/hwasan_thread_list.h

Show First 20 Lines • Show All 80 Lines • ▼ Show 20 Lines	HwasanThreadList(uptr storage, uptr size)
// * a ring buffer at offset 0,		// * a ring buffer at offset 0,
// * a Thread object at offset ring_buffer_size_.		// * a Thread object at offset ring_buffer_size_.
ring_buffer_size_ = RingBufferSize();		ring_buffer_size_ = RingBufferSize();
thread_alloc_size_ =		thread_alloc_size_ =
RoundUpTo(ring_buffer_size_ + sizeof(Thread), ring_buffer_size_ * 2);		RoundUpTo(ring_buffer_size_ + sizeof(Thread), ring_buffer_size_ * 2);
}		}

Thread *CreateCurrentThread() {		Thread *CreateCurrentThread() {
Thread *t;		Thread *t = nullptr;
{		{
SpinMutexLock l(&list_mutex_);		SpinMutexLock l(&free_list_mutex_);
if (!free_list_.empty()) {		if (!free_list_.empty()) {
		vitalybukaUnsubmitted Not Done Reply Inline Actions unrelated to the patch but maybe free_list_ and live_list_ could have each own mutex vitalybuka: unrelated to the patch but maybe free_list_ and live_list_ could have each own mutex
		eugenisAuthorUnsubmitted Done Reply Inline Actions I've actually tried this and could not see a measurable perf improvement. eugenis: I've actually tried this and could not see a measurable perf improvement.
		vitalybukaUnsubmitted Not Done Reply Inline Actions To my taste it would be cleaner that way: different resources rely on different mutexes. This bug will less likely to happen in the first place :) vitalybuka: To my taste it would be cleaner that way: different resources rely on different mutexes. This…
t = free_list_.back();		t = free_list_.back();
free_list_.pop_back();		free_list_.pop_back();
		}
		}
		if (t) {
uptr start = (uptr)t - ring_buffer_size_;		uptr start = (uptr)t - ring_buffer_size_;
internal_memset((void *)start, 0, ring_buffer_size_ + sizeof(Thread));		internal_memset((void *)start, 0, ring_buffer_size_ + sizeof(Thread));
} else {		} else {
t = AllocThread();		t = AllocThread();
}		}
		{
		SpinMutexLock l(&live_list_mutex_);
live_list_.push_back(t);		live_list_.push_back(t);
}		}
t->Init((uptr)t - ring_buffer_size_, ring_buffer_size_);		t->Init((uptr)t - ring_buffer_size_, ring_buffer_size_);
AddThreadStats(t);		AddThreadStats(t);
return t;		return t;
}		}

void DontNeedThread(Thread *t) {		void DontNeedThread(Thread *t) {
uptr start = (uptr)t - ring_buffer_size_;		uptr start = (uptr)t - ring_buffer_size_;
ReleaseMemoryPagesToOS(start, start + thread_alloc_size_);		ReleaseMemoryPagesToOS(start, start + thread_alloc_size_);
}		}

void RemoveThreadFromLiveList(Thread *t) {		void RemoveThreadFromLiveList(Thread *t) {
		SpinMutexLock l(&live_list_mutex_);
for (Thread *&t2 : live_list_)		for (Thread *&t2 : live_list_)
if (t2 == t) {		if (t2 == t) {
// To remove t2, copy the last element of the list in t2's position, and		// To remove t2, copy the last element of the list in t2's position, and
// pop_back(). This works even if t2 is itself the last element.		// pop_back(). This works even if t2 is itself the last element.
t2 = live_list_.back();		t2 = live_list_.back();
live_list_.pop_back();		live_list_.pop_back();
return;		return;
}		}
CHECK(0 && "thread not found in live list");		CHECK(0 && "thread not found in live list");
}		}

void ReleaseThread(Thread *t) {		void ReleaseThread(Thread *t) {
RemoveThreadStats(t);		RemoveThreadStats(t);
t->Destroy();		t->Destroy();
SpinMutexLock l(&list_mutex_);		DontNeedThread(t);
RemoveThreadFromLiveList(t);		RemoveThreadFromLiveList(t);
		SpinMutexLock l(&free_list_mutex_);
free_list_.push_back(t);		free_list_.push_back(t);
DontNeedThread(t);
}		}
		vitalybukaUnsubmitted Done Reply Inline Actions {} redundant in this case vitalybuka: {} redundant in this case

Thread *GetThreadByBufferAddress(uptr p) {		Thread *GetThreadByBufferAddress(uptr p) {
return (Thread )(RoundDownTo(p, ring_buffer_size_ 2) +		return (Thread )(RoundDownTo(p, ring_buffer_size_ 2) +
ring_buffer_size_);		ring_buffer_size_);
}		}

uptr MemoryUsedPerThread() {		uptr MemoryUsedPerThread() {
uptr res = sizeof(Thread) + ring_buffer_size_;		uptr res = sizeof(Thread) + ring_buffer_size_;
if (auto sz = flags()->heap_history_size)		if (auto sz = flags()->heap_history_size)
res += HeapAllocationsRingBuffer::SizeInBytes(sz);		res += HeapAllocationsRingBuffer::SizeInBytes(sz);
return res;		return res;
}		}

template <class CB>		template <class CB>
void VisitAllLiveThreads(CB cb) {		void VisitAllLiveThreads(CB cb) {
SpinMutexLock l(&list_mutex_);		SpinMutexLock l(&live_list_mutex_);
for (Thread *t : live_list_) cb(t);		for (Thread *t : live_list_) cb(t);
}		}

void AddThreadStats(Thread *t) {		void AddThreadStats(Thread *t) {
SpinMutexLock l(&stats_mutex_);		SpinMutexLock l(&stats_mutex_);
stats_.n_live_threads++;		stats_.n_live_threads++;
stats_.total_stack_size += t->stack_size();		stats_.total_stack_size += t->stack_size();
}		}
Show All 19 Lines	Thread *AllocThread() {
return t;		return t;
}		}

uptr free_space_;		uptr free_space_;
uptr free_space_end_;		uptr free_space_end_;
uptr ring_buffer_size_;		uptr ring_buffer_size_;
uptr thread_alloc_size_;		uptr thread_alloc_size_;

		SpinMutex free_list_mutex_;
InternalMmapVector<Thread *> free_list_;		InternalMmapVector<Thread *> free_list_;
		SpinMutex live_list_mutex_;
InternalMmapVector<Thread *> live_list_;		InternalMmapVector<Thread *> live_list_;
SpinMutex list_mutex_;

ThreadStats stats_;		ThreadStats stats_;
SpinMutex stats_mutex_;		SpinMutex stats_mutex_;
};		};

void InitThreadList(uptr storage, uptr size);		void InitThreadList(uptr storage, uptr size);
HwasanThreadList &hwasanThreadList();		HwasanThreadList &hwasanThreadList();

} // namespace		} // namespace