diff --git a/openmp/libomptarget/test/offloading/target_nowait_target.cpp b/openmp/libomptarget/test/offloading/target_nowait_target.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/offloading/target_nowait_target.cpp @@ -0,0 +1,31 @@ +// RUN: %libomptarget-compilexx-and-run-generic + +// UNSUPPORTED: amdgcn-amd-amdhsa + +#include + +int main(int argc, char *argv[]) { + int data[1024]; + int sum = 0; + + for (int i = 0; i < 1024; ++i) + data[i] = i; + +#pragma omp target map(tofrom: sum) map(to: data) depend(inout : data[0]) nowait + { + for (int i = 0; i < 1024; ++i) { + sum += data[i]; + } + } + +#pragma omp target map(tofrom: sum) map(to: data) depend(inout : data[0]) + { + for (int i = 0; i < 1024; ++i) { + sum += data[i]; + } + } + + assert(sum == 1023 * 1024); + + return 0; +} diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -2037,8 +2037,10 @@ } #endif - KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == - TRUE); + KMP_DEBUG_ASSERT( + this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE || + this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == + TRUE); __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); __kmp_task_team_setup(this_thr, team, 0); diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -531,7 +531,8 @@ kmp_task_team_t *task_team = this_thr->th.th_task_team; // we need to wait for the proxy tasks before finishing the thread - if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) + if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || + task_team->tt.tt_hidden_helper_task_encountered)) __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); KMP_MB(); diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -4106,7 +4106,8 @@ kmp_task_team_t *task_team = thread->th.th_task_team; // we need to wait for the proxy tasks before finishing the thread - if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { + if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || + task_team->tt.tt_hidden_helper_task_encountered)) { #if OMPT_SUPPORT // the runtime is shutting down so we won't report any events thread->th.ompt_thread_info.state = ompt_state_undefined; diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -829,8 +829,10 @@ bool ignore = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; - ignore = ignore && thread->th.th_task_team != NULL && - thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE; + ignore = + ignore && thread->th.th_task_team != NULL && + thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE && + thread->th.th_task_team->tt.tt_hidden_helper_task_encountered == FALSE; ignore = ignore || current_task->td_dephash == NULL; if (ignore) { diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -3077,7 +3077,7 @@ // We could be getting tasks from target constructs; if this is the only // thread, keep trying to execute tasks from own queue if (nthreads == 1 && - KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks)) + KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) > 1) use_own_tasks = 1; else { KA_TRACE(15, @@ -3478,6 +3478,7 @@ TCW_4(task_team->tt.tt_found_tasks, FALSE); TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); + TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE); task_team->tt.tt_nproc = nthreads = team->t.t_nproc; KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads); @@ -3640,6 +3641,7 @@ TCW_4(task_team->tt.tt_nproc, team->t.t_nproc); TCW_4(task_team->tt.tt_found_tasks, FALSE); TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); + TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE); KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, team->t.t_nproc); TCW_4(task_team->tt.tt_active, TRUE); @@ -3732,8 +3734,10 @@ "setting active to false, setting local and team's pointer to NULL\n", __kmp_gtid_from_thread(this_thr), task_team)); KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 || - task_team->tt.tt_found_proxy_tasks == TRUE); + task_team->tt.tt_found_proxy_tasks == TRUE || + task_team->tt.tt_hidden_helper_task_encountered == TRUE); TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE); + TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE); KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0); TCW_SYNC_4(task_team->tt.tt_active, FALSE); KMP_MB();