Index: openmp/libomptarget/DeviceRTL/src/Kernel.cpp =================================================================== --- openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -34,6 +34,20 @@ uint32_t TId = mapping::getThreadIdInBlock(); + // Stop now if this thread can never be an active worker thread. This is + // important at least for the nvptx implementation in the case of the Pascal + // microarchitecture. In that case, a single thread can apparently satisfy a + // barrier on the behalf of all threads in the same warp. Thus, it would not + // be safe for other threads in the master thread's warp to reach the + // synchronize::threads call below before the master thread reaches its + // corresponding synchronize::threads call: all active worker threads could + // then proceed to see WorkFn=0 and immediately quit without doing any work. + // The nvptx implementation ensures mapping::getBlockSize does not include any + // of the master thread's warp, so none of its threads can ever be active + // worker threads anyway. + if (TId >= mapping::getBlockSize()) + return; + do { ParallelRegionFnTy WorkFn = 0; Index: openmp/libomptarget/deviceRTLs/common/src/omptarget.cu =================================================================== --- openmp/libomptarget/deviceRTLs/common/src/omptarget.cu +++ openmp/libomptarget/deviceRTLs/common/src/omptarget.cu @@ -179,8 +179,22 @@ EXTERN bool __kmpc_kernel_parallel(void**WorkFn); static void __kmpc_target_region_state_machine(ident_t *Ident) { - int TId = __kmpc_get_hardware_thread_id_in_block(); + + // Stop now if this thread can never be an active worker thread. This is + // important at least for the nvptx implementation in the case of the Pascal + // microarchitecture. In that case, a single thread can apparently satisfy a + // barrier on the behalf of all threads in the same warp. Thus, it would not + // be safe for other threads in the master thread's warp to reach the + // synchronize::threads call below before the master thread reaches its + // corresponding synchronize::threads call: all active worker threads could + // then proceed to see WorkFn=0 and immediately quit without doing any work. + // This implementation ensures GetNumberOfWorkersInTeam() does not include any + // of the master thread's warp, so none of its threads can ever be active + // worker threads anyway. + if (TId >= GetNumberOfWorkersInTeam()) + return; + do { void* WorkFn = 0; Index: openmp/libomptarget/test/offloading/bug51781.c =================================================================== --- /dev/null +++ openmp/libomptarget/test/offloading/bug51781.c @@ -0,0 +1,15 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +#include +int main() { + int x = 0, y = 1; + #pragma omp target teams num_teams(1) map(tofrom:x, y) + { + x = 5; + #pragma omp parallel + y = 6; + } + // CHECK: 5, 6 + printf("%d, %d\n", x, y); + return 0; +}