Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -2119,6 +2119,7 @@ extern kmp_tasking_mode_t __kmp_tasking_mode; /* determines how/when to execute tasks */ extern int __kmp_task_stealing_constraint; +extern int __kmp_enable_task_throttling; #if OMP_40_ENABLED extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if // specified, defaults to 0 otherwise Index: runtime/src/kmp_global.cpp =================================================================== --- runtime/src/kmp_global.cpp +++ runtime/src/kmp_global.cpp @@ -341,6 +341,7 @@ KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4); int __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */ +int __kmp_enable_task_throttling = 1; #ifdef DEBUG_SUSPEND int __kmp_suspend_count = 0; Index: runtime/src/kmp_settings.cpp =================================================================== --- runtime/src/kmp_settings.cpp +++ runtime/src/kmp_settings.cpp @@ -4682,6 +4682,20 @@ } // __kmp_stg_print_forkjoin_frames #endif /* USE_ITT_BUILD */ +// ----------------------------------------------------------------------------- +// KMP_ENABLE_TASK_THROTTLING + +static void __kmp_stg_parse_task_throttling(char const *name, + char const *value, void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_enable_task_throttling); +} // __kmp_stg_parse_task_throttling + + +static void __kmp_stg_print_task_throttling(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling); +} // __kmp_stg_print_task_throttling + // ----------------------------------------------------------------------------- // OMP_DISPLAY_ENV @@ -5003,6 +5017,8 @@ {"KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode, __kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0}, #endif + {"KMP_ENABLE_TASK_THROTTLING", __kmp_stg_parse_task_throttling, + __kmp_stg_print_task_throttling, NULL, 0, 0}, #if OMP_40_ENABLED {"OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env, Index: runtime/src/kmp_tasking.cpp =================================================================== --- runtime/src/kmp_tasking.cpp +++ runtime/src/kmp_tasking.cpp @@ -374,7 +374,8 @@ // Check if deque is full if (TCR_4(thread_data->td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td)) { - if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, + if (__kmp_enable_task_throttling && + __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, thread->th.th_current_task)) { KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning " "TASK_NOT_PUSHED for task %p\n", @@ -394,7 +395,8 @@ // Need to recheck as we can get a proxy task from thread outside of OpenMP if (TCR_4(thread_data->td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td)) { - if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, + if (__kmp_enable_task_throttling && + __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, thread->th.th_current_task)) { __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; "