diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -3834,6 +3834,7 @@ return result; } +#define PROXY_TASK_FLAG 0x40000000 /* The finish of the proxy tasks is divided in two pieces: - the top half is the one that can be done from a thread outside the team - the bottom half must be run from a thread within the team @@ -3863,7 +3864,7 @@ // Create an imaginary children for this task so the bottom half cannot // release the task before we have completed the second top half - KMP_ATOMIC_INC(&taskdata->td_incomplete_child_tasks); + KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG); } static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) { @@ -3875,7 +3876,7 @@ KMP_DEBUG_ASSERT(children >= 0); // Remove the imaginary children - KMP_ATOMIC_DEC(&taskdata->td_incomplete_child_tasks); + KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG); } static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) { @@ -3888,7 +3889,8 @@ // We need to wait to make sure the top half is finished // Spinning here should be ok as this should happen quickly - while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) > 0) + while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) & + PROXY_TASK_FLAG) > 0) ; __kmp_release_deps(gtid, taskdata); diff --git a/openmp/runtime/test/tasking/detach_nested_task.c b/openmp/runtime/test/tasking/detach_nested_task.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/tasking/detach_nested_task.c @@ -0,0 +1,62 @@ +// RUN: %libomp-compile-and-run + +// Checked gcc 10.1 still does not support detach clause on task construct. +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9, gcc-10 +// gcc 11 introduced detach clause, but gomp interface in libomp has no support +// XFAIL: gcc-11, gcc-12 +// clang supports detach clause since version 11. +// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7 +// icc compiler does not support detach clause. +// UNSUPPORTED: icc + +// The outer detachable task creates multiple child tasks with dependencies +// when the last inner task incremented ret, the task calls omp_fulfill_event +// to release the outer task. + +#include +#include + +int *buf; + +int foo(int n) +{ + int ret = 0; + for (int i = 0; i < n; ++i) { + omp_event_handle_t event; + #pragma omp task detach(event) firstprivate(i,n) shared(ret) default(none) + { + for (int j = 0; j < n; ++j) { + #pragma omp task firstprivate(event,i,j,n) shared(ret) default(none) depend(out:ret) + { + //printf("Task %i, %i: %i\n", i, j, omp_get_thread_num()); + #pragma omp atomic + ret++; +#if _OPENMP + if (j == n-1) { + //printf("Task %i, %i: omp_fulfill_event()\n", i, j); + omp_fulfill_event(event); + } +#endif + } + } + } + } + // the taskwait only guarantees the outer tasks to complete. + #pragma omp taskwait + + return ret; +} + + +int main() +{ + int ret; +#pragma omp parallel +#pragma omp master + { + ret = foo(8); + } + printf("%i\n", ret); + //CHECK: 64 + return 0; +}