Index: runtime/src/i18n/en_US.txt =================================================================== --- runtime/src/i18n/en_US.txt +++ runtime/src/i18n/en_US.txt @@ -433,7 +433,7 @@ OBSOLETE "Check NLSPATH environment variable, its value is \"%1$s\"." ChangeStackLimit "Please try changing the shell stack limit or adjusting the " "OMP_STACKSIZE environment variable." -Unset_ALL_THREADS "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set)." +Unset_ALL_THREADS "Consider unsetting KMP_DEVICE_THREAD_LIMIT (KMP_ALL_THREADS) and OMP_THREAD_LIMIT (if either is set)." Set_ALL_THREADPRIVATE "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d." PossibleSystemLimitOnThreads "This could also be due to a system-related limit on the number of threads." DuplicateLibrary "This means that multiple copies of the OpenMP runtime have been " Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -2689,6 +2689,7 @@ kmp_lock_t r_begin_lock; volatile int r_begin; int r_blocktime; /* blocktime for this root and descendants */ + int r_cg_nthreads; // count of active threads in a contention group } kmp_base_root_t; typedef union KMP_ALIGN_CACHE kmp_root { @@ -2863,8 +2864,10 @@ extern int __kmp_avail_proc; /* number of processors available to the process */ extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */ extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */ -extern int - __kmp_max_nth; /* maximum total number of concurrently-existing threads */ +// maximum total number of concurrently-existing threads on device +extern int __kmp_max_nth; +// maximum total number of concurrently-existing threads in a contention group +extern int __kmp_cg_max_nth; extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */ extern int __kmp_dflt_team_nth; /* default number of threads in a parallel Index: runtime/src/kmp_ftn_entry.h =================================================================== --- runtime/src/kmp_ftn_entry.h +++ runtime/src/kmp_ftn_entry.h @@ -550,7 +550,7 @@ __kmp_serial_initialize(); }; /* global ICV */ - return __kmp_max_nth; + return __kmp_cg_max_nth; #endif } Index: runtime/src/kmp_global.cpp =================================================================== --- runtime/src/kmp_global.cpp +++ runtime/src/kmp_global.cpp @@ -135,6 +135,7 @@ size_t __kmp_sys_min_stksize = KMP_MIN_STKSIZE; int __kmp_sys_max_nth = KMP_MAX_NTH; int __kmp_max_nth = 0; +int __kmp_cg_max_nth = 0; int __kmp_threads_capacity = 0; int __kmp_dflt_team_nth = 0; int __kmp_dflt_team_nth_ub = 0; Index: runtime/src/kmp_runtime.cpp =================================================================== --- runtime/src/kmp_runtime.cpp +++ runtime/src/kmp_runtime.cpp @@ -881,7 +881,7 @@ KMP_ASSERT(0); } - // Respect KMP_ALL_THREADS, KMP_DEVICE_THREAD_LIMIT, OMP_THREAD_LIMIT. + // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT. if (__kmp_nth + new_nthreads - (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > __kmp_max_nth) { @@ -899,12 +899,41 @@ KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); } if (tl_nthreads == 1) { - KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced " - "reservation to 1 thread\n", + KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " + "reduced reservation to 1 thread\n", master_tid)); return 1; } - KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced " + KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " + "reservation to %d threads\n", + master_tid, tl_nthreads)); + new_nthreads = tl_nthreads; + } + + // Respect OMP_THREAD_LIMIT + if (root->r.r_cg_nthreads + new_nthreads - + (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > + __kmp_cg_max_nth) { + int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads + + (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); + if (tl_nthreads <= 0) { + tl_nthreads = 1; + } + + // If dyn-var is false, emit a 1-time warning. + if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { + __kmp_reserve_warn = 1; + __kmp_msg(kmp_ms_warning, + KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), + KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); + } + if (tl_nthreads == 1) { + KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " + "reduced reservation to 1 thread\n", + master_tid)); + return 1; + } + KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " "reservation to %d threads\n", master_tid, tl_nthreads)); new_nthreads = tl_nthreads; @@ -3116,6 +3145,7 @@ root->r.r_in_parallel = 0; root->r.r_blocktime = __kmp_dflt_blocktime; root->r.r_nested = __kmp_dflt_nested; + root->r.r_cg_nthreads = 1; /* setup the root team for this task */ /* allocate the root team structure */ @@ -3508,7 +3538,7 @@ // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the - // user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may become + // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become // > __kmp_max_nth in one of two ways: // // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] @@ -3889,6 +3919,8 @@ TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. + root->r.r_cg_nthreads--; + __kmp_reap_thread(root->r.r_uber_thread, 1); // We canot put root thread to __kmp_thread_pool, so we have to reap it istead @@ -4169,6 +4201,7 @@ KMP_DEBUG_ASSERT(new_thr->th.th_serial_team); TCW_4(__kmp_nth, __kmp_nth + 1); + root->r.r_cg_nthreads++; new_thr->th.th_task_state = 0; new_thr->th.th_task_state_top = 0; @@ -4316,6 +4349,8 @@ __kmp_all_nth++; __kmp_nth++; + root->r.r_cg_nthreads++; + // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low // numbers of procs, and method #2 (keyed API call) for higher numbers. if (__kmp_adjust_gtid_mode) { @@ -5378,6 +5413,7 @@ void __kmp_free_thread(kmp_info_t *this_th) { int gtid; kmp_info_t **scan; + kmp_root_t *root = this_th->th.th_root; KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid)); @@ -5436,6 +5472,7 @@ __kmp_thread_pool_nth++; TCW_4(__kmp_nth, __kmp_nth - 1); + root->r.r_cg_nthreads--; #ifdef KMP_ADJUST_BLOCKTIME /* Adjust blocktime back to user setting or default if necessary */ @@ -6375,6 +6412,7 @@ __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; } __kmp_max_nth = __kmp_sys_max_nth; + __kmp_cg_max_nth = __kmp_sys_max_nth; // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" // part @@ -6977,7 +7015,7 @@ if (num_teams * num_threads > __kmp_max_nth) { int new_threads = __kmp_max_nth / num_teams; if (!__kmp_reserve_warn) { // user asked for too many threads - __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT + __kmp_reserve_warn = 1; // that conflicts with KMP_DEVICE_THREAD_LIMIT __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, new_threads), KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); Index: runtime/src/kmp_settings.cpp =================================================================== --- runtime/src/kmp_settings.cpp +++ runtime/src/kmp_settings.cpp @@ -569,7 +569,7 @@ // Parse and print functions. // ----------------------------------------------------------------------------- -// KMP_ALL_THREADS, KMP_DEVICE_THREAD_LIMIT, OMP_THREAD_LIMIT +// KMP_DEVICE_THREAD_LIMIT, KMP_ALL_THREADS static void __kmp_stg_parse_device_thread_limit(char const *name, char const *value, void *data) { @@ -599,6 +599,20 @@ } // __kmp_stg_print_device_thread_limit // ----------------------------------------------------------------------------- +// OMP_THREAD_LIMIT +static void __kmp_stg_parse_thread_limit(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, 1, __kmp_sys_max_nth, &__kmp_cg_max_nth); + K_DIAG(1, ("__kmp_cg_max_nth == %d\n", __kmp_cg_max_nth)); + +} // __kmp_stg_parse_thread_limit + +static void __kmp_stg_print_thread_limit(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_cg_max_nth); +} // __kmp_stg_print_thread_limit + +// ----------------------------------------------------------------------------- // KMP_BLOCKTIME static void __kmp_stg_parse_blocktime(char const *name, char const *value, @@ -4386,8 +4400,8 @@ {"KMP_TASKLOOP_MIN_TASKS", __kmp_stg_parse_taskloop_min_tasks, __kmp_stg_print_taskloop_min_tasks, NULL, 0, 0}, #endif - {"OMP_THREAD_LIMIT", __kmp_stg_parse_device_thread_limit, - __kmp_stg_print_device_thread_limit, NULL, 0, 0}, + {"OMP_THREAD_LIMIT", __kmp_stg_parse_thread_limit, + __kmp_stg_print_thread_limit, NULL, 0, 0}, {"OMP_WAIT_POLICY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy, NULL, 0, 0}, {"KMP_DISP_NUM_BUFFERS", __kmp_stg_parse_disp_buffers, @@ -4687,27 +4701,22 @@ }; // if } - { // Initialize KMP_DEVICE_THREAD_LIMIT, KMP_ALL_THREADS, and - // OMP_THREAD_LIMIT data. + { // Initialize KMP_DEVICE_THREAD_LIMIT and KMP_ALL_THREADS kmp_setting_t *kmp_device_thread_limit = __kmp_stg_find("KMP_DEVICE_THREAD_LIMIT"); // 1st priority. kmp_setting_t *kmp_all_threads = __kmp_stg_find("KMP_ALL_THREADS"); // 2nd priority. - kmp_setting_t *omp_thread_limit = - __kmp_stg_find("OMP_THREAD_LIMIT"); // 3rd priority. // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - static kmp_setting_t *volatile rivals[4]; + static kmp_setting_t *volatile rivals[3]; int i = 0; rivals[i++] = kmp_device_thread_limit; rivals[i++] = kmp_all_threads; - rivals[i++] = omp_thread_limit; rivals[i++] = NULL; kmp_device_thread_limit->data = CCAST(kmp_setting_t **, rivals); kmp_all_threads->data = CCAST(kmp_setting_t **, rivals); - omp_thread_limit->data = CCAST(kmp_setting_t **, rivals); } #if KMP_AFFINITY_SUPPORTED Index: runtime/test/env/omp_thread_limit.c =================================================================== --- runtime/test/env/omp_thread_limit.c +++ runtime/test/env/omp_thread_limit.c @@ -0,0 +1,82 @@ +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=4 %libomp-run 4 +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=7 %libomp-run 7 +// +// OMP_THREAD_LIMIT=N should imply that no more than N threads are active in +// a contention group +#include +#include +#include +#include "omp_testsuite.h" + +int failed = 0; + +void usage() { + fprintf(stderr, "usage: omp_thread_limit \n"); +} + +void verify(const char* file_name, int line_number, int team_size) { + int num_threads = omp_get_num_threads(); + if (team_size != num_threads) { +#pragma omp critical(A) + { + char label[256]; + snprintf(label, sizeof(label), "%s:%d", file_name, line_number); + failed = 1; + printf("failed: %s: team_size(%d) != omp_get_num_threads(%d)\n", + label, team_size, num_threads); + } + } +} + +int main(int argc, char** argv) +{ + int cl_thread_limit; + + if (argc != 2) { + usage(); + return 1; + } + cl_thread_limit = atoi(argv[1]); + + omp_set_dynamic(0); + if (omp_get_thread_limit() != cl_thread_limit) { + fprintf(stderr, "omp_get_thread_limit failed with %d, should be%d\n", + omp_get_thread_limit(), cl_thread_limit); + return 1; + } + else if (omp_get_max_threads() > cl_thread_limit) { +#if _OPENMP + int team_size = cl_thread_limit; +#else + int team_size = 1; +#endif + omp_set_num_threads(19); + verify(__FILE__, __LINE__, 1); +#pragma omp parallel + { + verify(__FILE__, __LINE__, team_size); + verify(__FILE__, __LINE__, team_size); + } + verify(__FILE__, __LINE__, 1); + + omp_set_nested(1); +#pragma omp parallel num_threads(3) + { + verify(__FILE__, __LINE__, 3); +#pragma omp master +#pragma omp parallel num_threads(21) + { + verify(__FILE__, __LINE__, team_size-2); + verify(__FILE__, __LINE__, team_size-2); + } + } + verify(__FILE__, __LINE__, 1); + + return failed; + } else { + fprintf(stderr, "This test is not applicable for max num_threads='%d'\n", + omp_get_max_threads()); + return 0; + } + +}