Index: libomptarget/deviceRTLs/nvptx/src/libcall.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -70,10 +70,7 @@ EXTERN int omp_get_thread_limit(void) { if (isSPMDMode()) return GetNumberOfThreadsInBlock(); - // per contention group.. meaning threads in current team - omptarget_nvptx_TaskDescr *currTaskDescr = - getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false); - int rc = currTaskDescr->ThreadLimit(); + int rc = threadLimit; PRINT(LD_IO, "call omp_get_thread_limit() return %d\n", rc); return rc; } Index: libomptarget/deviceRTLs/nvptx/src/omp_data.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omp_data.cu +++ libomptarget/deviceRTLs/nvptx/src/omp_data.cu @@ -32,7 +32,7 @@ __device__ __shared__ uint32_t usedSlotIdx; __device__ __shared__ uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; - +__device__ __shared__ uint16_t threadLimit; // Pointer to this team's OpenMP state object __device__ __shared__ omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext; Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -165,7 +165,6 @@ INLINE int IsTaskConstruct() const { return !IsParallelConstruct(); } // methods for other fields INLINE uint16_t &NThreads() { return items.nthreads; } - INLINE uint16_t &ThreadLimit() { return items.threadlimit; } INLINE uint16_t &ThreadId() { return items.threadId; } INLINE uint16_t &ThreadsInTeam() { return items.threadsInTeam; } INLINE uint64_t &RuntimeChunkSize() { return items.runtimeChunkSize; } @@ -213,7 +212,6 @@ uint8_t flags; // 6 bit used (see flag above) uint8_t unused; uint16_t nthreads; // thread num for subsequent parallel regions - uint16_t threadlimit; // thread limit ICV uint16_t threadId; // thread id uint16_t threadsInTeam; // threads in current team uint64_t runtimeChunkSize; // runtime chunk size @@ -408,6 +406,7 @@ extern __device__ __shared__ uint32_t usedSlotIdx; extern __device__ __shared__ uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; +extern __device__ __shared__ uint16_t threadLimit; extern __device__ __shared__ omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext; Index: libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -74,7 +74,7 @@ omptarget_nvptx_TaskDescr *currTaskDescr = omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId); currTaskDescr->NThreads() = GetNumberOfWorkersInTeam(); - currTaskDescr->ThreadLimit() = ThreadLimit; + threadLimit = ThreadLimit; } EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized) { @@ -139,7 +139,6 @@ ASSERT0(LT_FUSSY, newTaskDescr, "expected a task descr"); newTaskDescr->InitLevelOneTaskDescr(ThreadLimit, currTeamDescr.LevelZeroTaskDescr()); - newTaskDescr->ThreadLimit() = ThreadLimit; // install new top descriptor omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId, newTaskDescr); Index: libomptarget/deviceRTLs/nvptx/src/parallel.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -249,9 +249,8 @@ uint16_t &NumThreadsClause = omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(threadId); - uint16_t NumThreads = - determineNumberOfThreads(NumThreadsClause, currTaskDescr->NThreads(), - currTaskDescr->ThreadLimit()); + uint16_t NumThreads = determineNumberOfThreads( + NumThreadsClause, currTaskDescr->NThreads(), threadLimit); if (NumThreadsClause != 0) { // Reset request to avoid propagating to successive #parallel