Index: libomptarget/src/interface.cpp =================================================================== --- libomptarget/src/interface.cpp +++ libomptarget/src/interface.cpp @@ -128,7 +128,7 @@ int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, 0); + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes, arg_types); @@ -181,7 +181,7 @@ int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, 0); + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes, arg_types); @@ -214,7 +214,7 @@ int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, 0); + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes, arg_types); @@ -255,7 +255,7 @@ int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, 0); + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, arg_types); @@ -298,7 +298,7 @@ int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, 0); + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, team_num, thread_limit); Index: libomptarget/test/offloading/target_depend_nowait.cpp =================================================================== --- /dev/null +++ libomptarget/test/offloading/target_depend_nowait.cpp @@ -0,0 +1,62 @@ +// RUN: %libomptarget-compilexx-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#define N 1024 + +int A[N]; +int B[N]; +int C[N]; +int main() { + for (int i = 0; i < N; i++) + A[i] = B[i] = i; + +#pragma omp parallel num_threads(2) + { + if (omp_get_thread_num() == 1) { +// map data A & B and move to +#pragma omp target enter data map(to : A, B) depend(out : A[0]) nowait + +// no data move since already mapped +#pragma omp target map(A, B) depend(out : A[0]) nowait + { + for (int i = 0; i < N; i++) + ++A[i]; + for (int i = 0; i < N; i++) + ++B[i]; + } + +// no data move since already mapped +#pragma omp target teams num_teams(1) map(A, B) depend(out : A[0]) nowait + { + for (int i = 0; i < N; i++) + ++A[i]; + for (int i = 0; i < N; i++) + ++B[i]; + } + +// A updated via update +#pragma omp target update from(A) depend(out : A[0]) nowait + +// B updated via exit, A just released +#pragma omp target exit data map(release \ + : A) map(from \ + : B) depend(out \ + : A[0]) nowait + } // if + } // parallel + + int Sum = 0; + for (int i = 0; i < N; i++) + Sum += A[i] + B[i]; + // Sum is 2 * N * (2 + N - 1 + 2) / 2 + // CHECK: Sum = 1051648. + printf("Sum = %d.\n", Sum); + + return Sum != 2 * N * (2 + N - 1 + 2) / 2; +} +