Do not hold the free/live thread list lock longer than necessary.
This change speeds up the following benchmark 10x.
constexpr int kTopThreads = 50;
constexpr int kChildThreads = 20;
constexpr int kChildIterations = 8;
void Thread() {
for (int i = 0; i < kChildIterations; ++i) { std::vector<std::thread> threads; for (int i = 0; i < kChildThreads; ++i) threads.emplace_back([](){}); for (auto& t : threads) t.join(); }
}
int main() {
std::vector<std::thread> threads; for (int i = 0; i < kTopThreads; ++i) threads.emplace_back(Thread); for (auto& t : threads) t.join();
}
unrelated to the patch but maybe free_list_ and live_list_ could have each own mutex