Index: libomptarget/deviceRTLs/nvptx/src/libcall.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -165,7 +165,7 @@ ASSERT0(LT_FUSSY, isSPMDMode(), "Expected SPMD mode only with uninitialized runtime."); // parallelLevel starts from 0, need to add 1 for correct level. - return parallelLevel + 1; + return parallelLevel[GetThreadIdInBlock() / WARPSIZE] + 1; } int level = 0; omptarget_nvptx_TaskDescr *currTaskDescr = Index: libomptarget/deviceRTLs/nvptx/src/omp_data.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omp_data.cu +++ libomptarget/deviceRTLs/nvptx/src/omp_data.cu @@ -31,7 +31,7 @@ __device__ __shared__ uint32_t usedMemIdx; __device__ __shared__ uint32_t usedSlotIdx; -__device__ __shared__ uint8_t parallelLevel; +__device__ __shared__ uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; // Pointer to this team's OpenMP state object __device__ __shared__ Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -406,7 +406,8 @@ omptarget_nvptx_simpleMemoryManager; extern __device__ __shared__ uint32_t usedMemIdx; extern __device__ __shared__ uint32_t usedSlotIdx; -extern __device__ __shared__ uint8_t parallelLevel; +extern __device__ __shared__ uint8_t + parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; extern __device__ __shared__ omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext; Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -95,8 +95,10 @@ // If OMP runtime is not required don't initialize OMP state. setExecutionParameters(Spmd, RuntimeUninitialized); if (GetThreadIdInBlock() == 0) { - parallelLevel = 0; usedSlotIdx = smid() % MAX_SM; + parallelLevel[0] = 0; + } else if (GetThreadIdInBlock() % WARPSIZE == 0) { + parallelLevel[GetThreadIdInBlock() / WARPSIZE] = 0; } __SYNCTHREADS(); return; Index: libomptarget/deviceRTLs/nvptx/src/parallel.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -339,10 +339,12 @@ if (checkRuntimeUninitialized(loc)) { ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected SPMD mode with uninitialized runtime."); - __SYNCTHREADS(); - if (GetThreadIdInBlock() == 0) - ++parallelLevel; - __SYNCTHREADS(); + unsigned tnum = __ACTIVEMASK(); + int leader = __ffs(tnum) - 1; + __SHFL_SYNC(tnum, leader, leader); + if (GetThreadIdInBlock() % WARPSIZE == leader) + ++parallelLevel[GetThreadIdInBlock() / WARPSIZE]; + __SHFL_SYNC(tnum, leader, leader); return; } @@ -382,10 +384,12 @@ if (checkRuntimeUninitialized(loc)) { ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected SPMD mode with uninitialized runtime."); - __SYNCTHREADS(); - if (GetThreadIdInBlock() == 0) - --parallelLevel; - __SYNCTHREADS(); + unsigned tnum = __ACTIVEMASK(); + int leader = __ffs(tnum) - 1; + __SHFL_SYNC(tnum, leader, leader); + if (GetThreadIdInBlock() % WARPSIZE == leader) + --parallelLevel[GetThreadIdInBlock() / WARPSIZE]; + __SHFL_SYNC(tnum, leader, leader); return; } @@ -407,7 +411,7 @@ if (checkRuntimeUninitialized(loc)) { ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected SPMD mode with uninitialized runtime."); - return parallelLevel + 1; + return parallelLevel[GetThreadIdInBlock() / WARPSIZE] + 1; } int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); Index: libomptarget/deviceRTLs/nvptx/src/supporti.h =================================================================== --- libomptarget/deviceRTLs/nvptx/src/supporti.h +++ libomptarget/deviceRTLs/nvptx/src/supporti.h @@ -154,7 +154,7 @@ ASSERT0(LT_FUSSY, isSPMDExecutionMode, "Uninitialized runtime with non-SPMD mode."); // For level 2 parallelism all parallel regions are executed sequentially. - if (parallelLevel > 0) + if (parallelLevel[GetThreadIdInBlock() / WARPSIZE] > 0) rc = 0; else rc = GetThreadIdInBlock(); @@ -175,7 +175,7 @@ ASSERT0(LT_FUSSY, isSPMDExecutionMode, "Uninitialized runtime with non-SPMD mode."); // For level 2 parallelism all parallel regions are executed sequentially. - if (parallelLevel > 0) + if (parallelLevel[GetThreadIdInBlock() / WARPSIZE] > 0) rc = 1; else rc = GetNumberOfThreadsInBlock(); Index: libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp =================================================================== --- libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp +++ libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp @@ -5,15 +5,25 @@ int main(void) { int isHost = -1; - int ParallelLevel1, ParallelLevel2 = -1; + int ParallelLevel1, ParallelLevel2 = -1, Count = 0; -#pragma omp target parallel map(from: isHost, ParallelLevel1, ParallelLevel2) +#pragma omp target parallel map(from \ + : isHost, ParallelLevel1, ParallelLevel2) \ + map(tofrom \ + : Count) num_threads(128) { isHost = omp_is_initial_device(); ParallelLevel1 = omp_get_level(); -#pragma omp parallel for schedule(dynamic) lastprivate(ParallelLevel2) - for (int I = 0; I < 10; ++I) - ParallelLevel2 = omp_get_level(); + if (omp_get_thread_num() > 17) { + int Cnt; +#pragma omp parallel for schedule(dynamic) lastprivate(ParallelLevel2) reduction(+: Cnt) + for (int I = 0; I < 10; ++I) { + ParallelLevel2 = omp_get_level(); + Cnt += omp_get_level(); + } +#pragma omp critical + Count += Cnt; + } } if (isHost < 0) { @@ -25,6 +35,9 @@ // CHECK: Parallel level in SPMD mode: L1 is 1, L2 is 2 printf("Parallel level in SPMD mode: L1 is %d, L2 is %d\n", ParallelLevel1, ParallelLevel2); + // 2200 = 2 (par_level) * 10 (iterations in loop) * 110 (num_threads performing L2 parallel region) + // CHECK: Parallel counter is 2200 + printf("Parallel counter is %d\n", Count); return isHost; }