Index: openmp/trunk/runtime/src/kmp.h =================================================================== --- openmp/trunk/runtime/src/kmp.h +++ openmp/trunk/runtime/src/kmp.h @@ -2121,6 +2121,7 @@ extern kmp_tasking_mode_t __kmp_tasking_mode; /* determines how/when to execute tasks */ extern int __kmp_task_stealing_constraint; +extern int __kmp_enable_task_throttling; #if OMP_40_ENABLED extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if // specified, defaults to 0 otherwise Index: openmp/trunk/runtime/src/kmp_global.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_global.cpp +++ openmp/trunk/runtime/src/kmp_global.cpp @@ -341,6 +341,7 @@ KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4); int __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */ +int __kmp_enable_task_throttling = 1; #ifdef DEBUG_SUSPEND int __kmp_suspend_count = 0; Index: openmp/trunk/runtime/src/kmp_settings.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_settings.cpp +++ openmp/trunk/runtime/src/kmp_settings.cpp @@ -4683,6 +4683,20 @@ #endif /* USE_ITT_BUILD */ // ----------------------------------------------------------------------------- +// KMP_ENABLE_TASK_THROTTLING + +static void __kmp_stg_parse_task_throttling(char const *name, + char const *value, void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_enable_task_throttling); +} // __kmp_stg_parse_task_throttling + + +static void __kmp_stg_print_task_throttling(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling); +} // __kmp_stg_print_task_throttling + +// ----------------------------------------------------------------------------- // OMP_DISPLAY_ENV #if OMP_40_ENABLED @@ -5003,6 +5017,8 @@ {"KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode, __kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0}, #endif + {"KMP_ENABLE_TASK_THROTTLING", __kmp_stg_parse_task_throttling, + __kmp_stg_print_task_throttling, NULL, 0, 0}, #if OMP_40_ENABLED {"OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env, Index: openmp/trunk/runtime/src/kmp_tasking.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_tasking.cpp +++ openmp/trunk/runtime/src/kmp_tasking.cpp @@ -374,7 +374,8 @@ // Check if deque is full if (TCR_4(thread_data->td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td)) { - if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, + if (__kmp_enable_task_throttling && + __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, thread->th.th_current_task)) { KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning " "TASK_NOT_PUSHED for task %p\n", @@ -394,7 +395,8 @@ // Need to recheck as we can get a proxy task from thread outside of OpenMP if (TCR_4(thread_data->td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td)) { - if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, + if (__kmp_enable_task_throttling && + __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, thread->th.th_current_task)) { __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; " Index: openmp/trunk/runtime/test/tasking/omp_fill_taskqueue.c =================================================================== --- openmp/trunk/runtime/test/tasking/omp_fill_taskqueue.c +++ openmp/trunk/runtime/test/tasking/omp_fill_taskqueue.c @@ -0,0 +1,60 @@ +// RUN: %libomp-compile && env KMP_ENABLE_TASK_THROTTLING=0 %libomp-run +// RUN: %libomp-compile && env KMP_ENABLE_TASK_THROTTLING=1 %libomp-run + +#include +#include +#include + +/** + * Test the task throttling behavior of the runtime. + * Unless OMP_NUM_THREADS is 1, the master thread pushes tasks to its own tasks + * queue until either of the following happens: + * - the task queue is full, and it starts serializing tasks + * - all tasks have been pushed, and it can begin execution + * The idea is to create a huge number of tasks which execution are blocked + * until the master thread comes to execute tasks (they need to be blocking, + * otherwise the second thread will start emptying the queue). + * At this point we can check the number of enqueued tasks: iff all tasks have + * been enqueued, then there was no task throttling. + * Otherwise there has been some sort of task throttling. + * If what we detect doesn't match the value of the environment variable, the + * test is failed. + */ + + +#define NUM_TASKS 2000 + + +int main() +{ + int i; + int block = 1; + int tid; + int throttling = strcmp(getenv("KMP_ENABLE_TASK_THROTTLING"), "1") == 0; + int enqueued = 0; + int failed = -1; + + #pragma omp parallel num_threads(2) + #pragma omp master + { + for (i = 0; i < NUM_TASKS; i++) { + enqueued++; + #pragma omp task + { + tid = omp_get_thread_num(); + if (tid == 0) { + // As soon as the master thread starts executing task we should unlock + // all tasks, and detect the test failure if it has not been done yet. + if (failed < 0) + failed = throttling ? enqueued == NUM_TASKS : enqueued < NUM_TASKS; + block = 0; + } + while (block) + ; + } + } + block = 0; + } + + return failed; +}