Index: libomptarget/deviceRTLs/nvptx/src/sync.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/sync.cu +++ libomptarget/deviceRTLs/nvptx/src/sync.cu @@ -62,6 +62,9 @@ // Barrier #1 is for synchronization among active threads. named_sync(L1_BARRIER, threads); } + } else { + // Still need to flush the memory per the standard. + __kmpc_flush(loc_ref); } // numberOfActiveOMPThreads > 1 PRINT0(LD_SYNC, "completed kmpc_barrier\n"); } Index: libomptarget/deviceRTLs/nvptx/test/parallel/barrier.c =================================================================== --- /dev/null +++ libomptarget/deviceRTLs/nvptx/test/parallel/barrier.c @@ -0,0 +1,34 @@ +// RUN: %compile-run-and-check + +#include +#include + +int main(int argc, char *argv[]) { + int data, flag = 0; +#pragma omp target teams num_teams(2) map(tofrom : data, flag) thread_limit(1) +#pragma omp parallel num_threads(1) + { + if (omp_get_team_num() == 0) { + /* Write to the data buffer that will be read by thread */ + data = 42; +/* Flush data to thread 1 and strictly order the write to data + relative to the write to the flag */ +#pragma omp barrier + /* Set flag to release thread 1 */ + flag = 1; +/* Flush flag to ensure that thread 1 sees the change */ +#pragma omp barrier + } else if (omp_get_team_num() == 1) { +/* Loop until we see the update to the flag */ +#pragma omp barrier + while (flag < 1) { +#pragma omp barrier + } +#pragma omp barrier + } + } + // CHECK: data=42. + /* Value of data will be 42, value of flag still undefined */ + printf("data=%d.\n", data); + return 0; +}