Do not hold the free/live thread list lock longer than necessary.
This change speeds up the following benchmark 10x.
constexpr int kTopThreads = 50;
constexpr int kChildThreads = 20;
constexpr int kChildIterations = 8;
void Thread() {
for (int i = 0; i < kChildIterations; ++i) {
std::vector<std::thread> threads;
for (int i = 0; i < kChildThreads; ++i)
threads.emplace_back([](){});
for (auto& t : threads)
t.join();
}}
int main() {
std::vector<std::thread> threads; for (int i = 0; i < kTopThreads; ++i) threads.emplace_back(Thread); for (auto& t : threads) t.join();
}
unrelated to the patch but maybe free_list_ and live_list_ could have each own mutex