Index: libomptarget/deviceRTLs/nvptx/src/omp_data.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omp_data.cu +++ libomptarget/deviceRTLs/nvptx/src/omp_data.cu @@ -31,7 +31,8 @@ __device__ __shared__ uint32_t usedMemIdx; __device__ __shared__ uint32_t usedSlotIdx; -__device__ __shared__ uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; +__device__ __shared__ volatile uint8_t + parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; __device__ __shared__ uint16_t threadLimit; __device__ __shared__ uint16_t threadsInTeam; __device__ __shared__ uint16_t nThreads; Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -398,7 +398,7 @@ omptarget_nvptx_simpleMemoryManager; extern __device__ __shared__ uint32_t usedMemIdx; extern __device__ __shared__ uint32_t usedSlotIdx; -extern __device__ __shared__ uint8_t +extern __device__ __shared__ volatile uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; extern __device__ __shared__ uint16_t threadLimit; extern __device__ __shared__ uint16_t threadsInTeam; Index: libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp =================================================================== --- libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp +++ libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp @@ -47,5 +47,15 @@ // CHECK: Expected count = 86 printf("Expected count = %d\n", Count); + Count = 0; +#pragma omp target parallel for reduction(+: Count) schedule(dynamic, 2) num_threads(64) + for (int J = 0; J < 1000; ++J) { + Count += J; + } + + // Final result of Count is 1000 * (999-0) / 2 + // CHECK: Expected count with dynamic scheduling = 499500 + printf("Expected count with dynamic scheduling = %d\n", Count); + return isHost; }