Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -73,14 +73,11 @@ } EXTERN int omp_get_thread_limit(void) { - if (isRuntimeUninitialized()) { - ASSERT0(LT_FUSSY, isSPMDMode(), - "Expected SPMD mode only with uninitialized runtime."); - return 0; // default is 0 - } + if (isSPMDMode()) + return GetNumberOfThreadsInBlock(); // per contention group.. meaning threads in current team omptarget_nvptx_TaskDescr *currTaskDescr = - getMyTopTaskDescriptor(isSPMDMode()); + getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false); int rc = currTaskDescr->ThreadLimit(); PRINT(LD_IO, "call omp_get_thread_limit() return %d\n", rc); return rc; Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c @@ -0,0 +1,72 @@ +// RUN: %compile-run-and-check + +#include +#include + +int main(int argc, char *argv[]) { + int ThreadLimitL0 = -1, ThreadLimitL1 = -1, ThreadLimitL2 = -1; + +#pragma omp declare reduction(unique64:int \ + : omp_out = (omp_in == 64 ? omp_in : omp_out)) \ + initializer(omp_priv = -1) +#pragma omp declare reduction(unique32:int \ + : omp_out = (omp_in == 32 ? omp_in : omp_out)) \ + initializer(omp_priv = -1) + + // Non-SPMD mode. +#pragma omp target teams map(ThreadLimitL0, ThreadLimitL1, ThreadLimitL2) \ + thread_limit(64) num_teams(1) + { + ThreadLimitL0 = omp_get_thread_limit(); +#pragma omp parallel reduction(unique64 \ + : ThreadLimitL1, ThreadLimitL2) num_threads(32) + { + ThreadLimitL1 = omp_get_thread_limit(); +#pragma omp parallel reduction(unique64 : ThreadLimitL2) + { ThreadLimitL2 = omp_get_thread_limit(); } + } + } + + // CHECK: Non-SPMD ThreadLimitL0 = 64 + printf("Non-SPMD ThreadLimitL0 = %d\n", ThreadLimitL0); + // CHECK: Non-SPMD ThreadLimitL1 = 64 + printf("Non-SPMD ThreadLimitL1 = %d\n", ThreadLimitL1); + // CHECK: Non-SPMD ThreadLimitL2 = 64 + printf("Non-SPMD ThreadLimitL2 = %d\n", ThreadLimitL2); + + // SPMD mode with full runtime + ThreadLimitL1 = -1; + ThreadLimitL2 = -1; +#pragma omp target parallel reduction(unique32 \ + : ThreadLimitL1, ThreadLimitL2) \ + num_threads(32) + { + ThreadLimitL1 = omp_get_thread_limit(); +#pragma omp parallel reduction(unique32 : ThreadLimitL2) + { ThreadLimitL2 = omp_get_thread_limit(); } + } + + // CHECK: SPMD with full runtime ThreadLimitL1 = 32 + printf("SPMD with full runtime ThreadLimitL1 = %d\n", ThreadLimitL1); + // CHECK: SPMD with full runtime ThreadLimitL2 = 32 + printf("SPMD with full runtime ThreadLimitL2 = %d\n", ThreadLimitL2); + + // SPMD mode without runtime + ThreadLimitL1 = -1; + ThreadLimitL2 = -1; +#pragma omp target parallel for reduction(unique32 \ + : ThreadLimitL1, ThreadLimitL2) \ + num_threads(32) + for (int I = 0; I < 2; ++I) { + ThreadLimitL1 = omp_get_thread_limit(); +#pragma omp parallel reduction(unique32 : ThreadLimitL2) + { ThreadLimitL2 = omp_get_thread_limit(); } + } + + // CHECK: SPMD without runtime ThreadLimitL1 = 32 + printf("SPMD without runtime ThreadLimitL1 = %d\n", ThreadLimitL1); + // CHECK: SPMD without runtime ThreadLimitL2 = 32 + printf("SPMD without runtime ThreadLimitL2 = %d\n", ThreadLimitL2); + + return 0; +}