diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -854,6 +854,12 @@ if (TCR_PTR(__kmp_threads[0]) == NULL) { --capacity; } + // If it is not for initializing the hidden helper team, we need to take + // __kmp_hidden_helper_threads_num out of the capacity because it is included + // in __kmp_threads_capacity. + if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { + capacity -= __kmp_hidden_helper_threads_num; + } if (__kmp_nth + new_nthreads - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > capacity) { @@ -3607,6 +3613,13 @@ --capacity; } + // If it is not for initializing the hidden helper team, we need to take + // __kmp_hidden_helper_threads_num out of the capacity because it is included + // in __kmp_threads_capacity. + if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) { + capacity -= __kmp_hidden_helper_threads_num; + } + /* see if there are too many threads */ if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) { if (__kmp_tp_cached) { @@ -3639,7 +3652,7 @@ /* find an available thread slot */ // Don't reassign the zero slot since we need that to only be used by // initial thread. Slots for hidden helper threads should also be skipped. - if (initial_thread && __kmp_threads[0] == NULL) { + if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { gtid = 0; } else { for (gtid = __kmp_hidden_helper_threads_num + 1; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -504,9 +504,10 @@ nth = (4 * __kmp_xproc); // If hidden helper task is enabled, we initialize the thread capacity with - // extra - // __kmp_hidden_helper_threads_num. - nth += __kmp_hidden_helper_threads_num; + // extra __kmp_hidden_helper_threads_num. + if (__kmp_enable_hidden_helper) { + nth += __kmp_hidden_helper_threads_num; + } if (nth > __kmp_max_nth) nth = __kmp_max_nth; diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp @@ -0,0 +1,45 @@ +// RUN: %libomp-cxx-compile-and-run + +#include + +#include +#include +#include +#include +#include + +void dummy_root() { + // omp_get_max_threads() will do middle initialization + int nthreads = omp_get_max_threads(); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); +} + +int main(int argc, char *argv[]) { + const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()), + 4 * omp_get_num_procs()), + std::numeric_limits::max()); + + std::vector data(N); + + // Create a new thread to initialize the OpenMP RTL. The new thread will not + // be taken as the "initial thread". + std::thread root(dummy_root); + +#pragma omp parallel for num_threads(N) + for (unsigned i = 0; i < N; ++i) { + data[i] = i; + } + +#pragma omp parallel for num_threads(N + 1) + for (unsigned i = 0; i < N; ++i) { + data[i] += i; + } + + for (unsigned i = 0; i < N; ++i) { + assert(data[i] == 2 * i); + } + + root.join(); + + return 0; +} diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp @@ -0,0 +1,31 @@ +// RUN: %libomp-cxx-compile-and-run + +#include + +#include +#include +#include + +int main(int argc, char *argv[]) { + const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()), + 4 * omp_get_num_procs()), + std::numeric_limits::max()); + + std::vector data(N); + +#pragma omp parallel for num_threads(N) + for (unsigned i = 0; i < N; ++i) { + data[i] = i; + } + +#pragma omp parallel for num_threads(N + 1) + for (unsigned i = 0; i < N; ++i) { + data[i] += i; + } + + for (unsigned i = 0; i < N; ++i) { + assert(data[i] == 2 * i); + } + + return 0; +}