Index: libomptarget/deviceRTLs/nvptx/src/libcall.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -164,7 +164,8 @@ if (isRuntimeUninitialized()) { ASSERT0(LT_FUSSY, isSPMDMode(), "Expected SPMD mode only with uninitialized runtime."); - return parallelLevel; + // parallelLevel starts from 0, need to add 1 for correct level. + return parallelLevel + 1; } int level = 0; omptarget_nvptx_TaskDescr *currTaskDescr = Index: libomptarget/deviceRTLs/nvptx/src/loop.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/loop.cu +++ libomptarget/deviceRTLs/nvptx/src/loop.cu @@ -205,8 +205,12 @@ INLINE static void dispatch_init(kmp_Ident *loc, int32_t threadId, kmp_sched_t schedule, T lb, T ub, ST st, ST chunk) { - ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), - "Expected non-SPMD mode + initialized runtime."); + if (checkRuntimeUninitialized(loc)) { + // In SPMD mode no need to check parallelism level - dynamic scheduling + // may appear only in L2 parallel regions with lightweight runtime. + ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected non-SPMD mode."); + return; + } int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(tid); T tnum = currTaskDescr->ThreadsInTeam(); @@ -439,8 +443,15 @@ INLINE static int dispatch_next(kmp_Ident *loc, int32_t gtid, int32_t *plast, T *plower, T *pupper, ST *pstride) { - ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), - "Expected non-SPMD mode + initialized runtime."); + if (checkRuntimeUninitialized(loc)) { + // In SPMD mode no need to check parallelism level - dynamic scheduling + // may appear only in L2 parallel regions with lightweight runtime. + ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected non-SPMD mode."); + if (*plast) + return DISPATCH_FINISHED; + *plast = 1; + return DISPATCH_NOTFINISHED; + } // ID of a thread in its own warp // automatically selects thread or warp ID based on selected implementation Index: libomptarget/deviceRTLs/nvptx/src/parallel.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -407,7 +407,7 @@ if (checkRuntimeUninitialized(loc)) { ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected SPMD mode with uninitialized runtime."); - return parallelLevel; + return parallelLevel + 1; } int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); Index: libomptarget/test/offloading/spmd_parallel_regions.cpp =================================================================== --- /dev/null +++ libomptarget/test/offloading/spmd_parallel_regions.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compilexx-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda + +#include +#include + +int main(void) { + int isHost = -1; + int ParallelLevel1, ParallelLevel2 = -1; + +#pragma omp target parallel map(from: isHost, ParallelLevel1, ParallelLevel2) + { + isHost = omp_is_initial_device(); + ParallelLevel1 = omp_get_level(); +#pragma omp parallel for schedule(dynamic) lastprivate(ParallelLevel2) + for (int I = 0; I < 10; ++I) + ParallelLevel2 = omp_get_level(); + } + + if (isHost < 0) { + printf("Runtime error, isHost=%d\n", isHost); + } + + // CHECK: Target region executed on the device + printf("Target region executed on the %s\n", isHost ? "host" : "device"); + // CHECK: Parallel level in SPMD mode: L1 is 1, L2 is 2 + printf("Parallel level in SPMD mode: L1 is %d, L2 is %d\n", ParallelLevel1, + ParallelLevel2); + + return isHost; +}