Index: libomptarget/deviceRTLs/nvptx/src/libcall.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -165,7 +165,7 @@ if (isRuntimeUninitialized()) { ASSERT0(LT_FUSSY, isSPMDMode(), "Expected SPMD mode only with uninitialized runtime."); - return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel(); + return parallelLevel; } int level = 0; omptarget_nvptx_TaskDescr *currTaskDescr = Index: libomptarget/deviceRTLs/nvptx/src/omp_data.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omp_data.cu +++ libomptarget/deviceRTLs/nvptx/src/omp_data.cu @@ -27,22 +27,17 @@ omptarget_nvptx_Queue omptarget_nvptx_device_State[MAX_SM]; -__device__ omptarget_nvptx_Queue - omptarget_nvptx_device_simpleState[MAX_SM]; - __device__ omptarget_nvptx_SimpleMemoryManager omptarget_nvptx_simpleMemoryManager; __device__ __shared__ uint32_t usedMemIdx; __device__ __shared__ uint32_t usedSlotIdx; +__device__ __shared__ uint8_t parallelLevel; + // Pointer to this team's OpenMP state object __device__ __shared__ omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext; -__device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext - *omptarget_nvptx_simpleThreadPrivateContext; - //////////////////////////////////////////////////////////////////////////////// // The team master sets the outlined parallel function in this variable to // communicate with the workers. Since it is in shared memory, there is one Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -391,39 +391,6 @@ INLINE const void *Acquire(const void *buf, size_t size); }; -class omptarget_nvptx_SimpleThreadPrivateContext { - uint16_t par_level[MAX_THREADS_PER_TEAM]; - -public: - INLINE void Init() { - ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(), - "Expected SPMD + uninitialized runtime modes."); - par_level[GetThreadIdInBlock()] = 0; - } - INLINE void IncParLevel() { - ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(), - "Expected SPMD + uninitialized runtime modes."); - ++par_level[GetThreadIdInBlock()]; - } - INLINE void DecParLevel() { - ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(), - "Expected SPMD + uninitialized runtime modes."); - ASSERT0(LT_FUSSY, par_level[GetThreadIdInBlock()] > 0, - "Expected parallel level >0."); - --par_level[GetThreadIdInBlock()]; - } - INLINE bool InL2OrHigherParallelRegion() const { - ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(), - "Expected SPMD + uninitialized runtime modes."); - return par_level[GetThreadIdInBlock()] > 0; - } - INLINE uint16_t GetParallelLevel() const { - ASSERT0(LT_FUSSY, isSPMDMode() && isRuntimeUninitialized(), - "Expected SPMD + uninitialized runtime modes."); - return par_level[GetThreadIdInBlock()] + 1; - } -}; - //////////////////////////////////////////////////////////////////////////////// // global device envrionment //////////////////////////////////////////////////////////////////////////////// @@ -440,10 +407,9 @@ omptarget_nvptx_simpleMemoryManager; extern __device__ __shared__ uint32_t usedMemIdx; extern __device__ __shared__ uint32_t usedSlotIdx; +extern __device__ __shared__ uint8_t parallelLevel; extern __device__ __shared__ omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext; -extern __device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext - *omptarget_nvptx_simpleThreadPrivateContext; extern __device__ __shared__ uint32_t execution_param; extern __device__ __shared__ void *ReductionScratchpadPtr; Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -21,10 +21,6 @@ omptarget_nvptx_Queue omptarget_nvptx_device_State[MAX_SM]; -extern __device__ omptarget_nvptx_Queue< - omptarget_nvptx_SimpleThreadPrivateContext, OMP_STATE_COUNT> - omptarget_nvptx_device_simpleState[MAX_SM]; - //////////////////////////////////////////////////////////////////////////////// // init entry points //////////////////////////////////////////////////////////////////////////////// @@ -100,14 +96,10 @@ // If OMP runtime is not required don't initialize OMP state. setExecutionParameters(Spmd, RuntimeUninitialized); if (GetThreadIdInBlock() == 0) { - int slot = smid() % MAX_SM; - usedSlotIdx = slot; - omptarget_nvptx_simpleThreadPrivateContext = - omptarget_nvptx_device_simpleState[slot].Dequeue(); + parallelLevel = 0; + usedSlotIdx = smid() % MAX_SM; } - // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. __SYNCTHREADS(); - omptarget_nvptx_simpleThreadPrivateContext->Init(); return; } setExecutionParameters(Spmd, RuntimeInitialized); @@ -172,18 +164,12 @@ EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) { // We're not going to pop the task descr stack of each thread since // there are no more parallel regions in SPMD mode. + if (!RequiresOMPRuntime) + return; + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. __SYNCTHREADS(); int threadId = GetThreadIdInBlock(); - if (!RequiresOMPRuntime) { - if (threadId == 0) { - // Enqueue omp state object for use by another team. - int slot = usedSlotIdx; - omptarget_nvptx_device_simpleState[slot].Enqueue( - omptarget_nvptx_simpleThreadPrivateContext); - } - return; - } if (threadId == 0) { // Enqueue omp state object for use by another team. int slot = usedSlotIdx; Index: libomptarget/deviceRTLs/nvptx/src/parallel.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -340,7 +340,11 @@ if (checkRuntimeUninitialized(loc)) { ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected SPMD mode with uninitialized runtime."); - omptarget_nvptx_simpleThreadPrivateContext->IncParLevel(); + __SYNCTHREADS(); + if (GetThreadIdInBlock() == 0) + ++parallelLevel; + __SYNCTHREADS(); + return; } @@ -379,7 +383,10 @@ if (checkRuntimeUninitialized(loc)) { ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected SPMD mode with uninitialized runtime."); - omptarget_nvptx_simpleThreadPrivateContext->DecParLevel(); + __SYNCTHREADS(); + if (GetThreadIdInBlock() == 0) + --parallelLevel; + __SYNCTHREADS(); return; } @@ -401,7 +408,7 @@ if (checkRuntimeUninitialized(loc)) { ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected SPMD mode with uninitialized runtime."); - return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel(); + return parallelLevel; } int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); Index: libomptarget/deviceRTLs/nvptx/src/supporti.h =================================================================== --- libomptarget/deviceRTLs/nvptx/src/supporti.h +++ libomptarget/deviceRTLs/nvptx/src/supporti.h @@ -155,8 +155,7 @@ ASSERT0(LT_FUSSY, isSPMDExecutionMode, "Uninitialized runtime with non-SPMD mode."); // For level 2 parallelism all parallel regions are executed sequentially. - if (omptarget_nvptx_simpleThreadPrivateContext - ->InL2OrHigherParallelRegion()) + if (parallelLevel > 0) rc = 0; else rc = GetThreadIdInBlock(); @@ -177,8 +176,7 @@ ASSERT0(LT_FUSSY, isSPMDExecutionMode, "Uninitialized runtime with non-SPMD mode."); // For level 2 parallelism all parallel regions are executed sequentially. - if (omptarget_nvptx_simpleThreadPrivateContext - ->InL2OrHigherParallelRegion()) + if (parallelLevel > 0) rc = 1; else rc = GetNumberOfThreadsInBlock();