diff --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu --- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu @@ -204,15 +204,15 @@ INLINE static void dispatch_init(kmp_Ident *loc, int32_t threadId, kmp_sched_t schedule, T lb, T ub, ST st, ST chunk) { - if (checkRuntimeUninitialized(loc)) { + if (isRuntimeUninitialized()) { // In SPMD mode no need to check parallelism level - dynamic scheduling // may appear only in L2 parallel regions with lightweight runtime. - ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected non-SPMD mode."); + ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected non-SPMD mode."); return; } - int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); + int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(tid); - T tnum = GetNumberOfOmpThreads(checkSPMDMode(loc)); + T tnum = GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode()); T tripCount = ub - lb + 1; // +1 because ub is inclusive ASSERT0(LT_FUSSY, threadId < tnum, "current thread is not needed here; error"); @@ -441,10 +441,10 @@ INLINE static int dispatch_next(kmp_Ident *loc, int32_t gtid, int32_t *plast, T *plower, T *pupper, ST *pstride) { - if (checkRuntimeUninitialized(loc)) { + if (isRuntimeUninitialized()) { // In SPMD mode no need to check parallelism level - dynamic scheduling // may appear only in L2 parallel regions with lightweight runtime. - ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected non-SPMD mode."); + ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected non-SPMD mode."); if (*plast) return DISPATCH_FINISHED; *plast = 1; @@ -453,8 +453,8 @@ // ID of a thread in its own warp // automatically selects thread or warp ID based on selected implementation - int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); - ASSERT0(LT_FUSSY, gtid < GetNumberOfOmpThreads(checkSPMDMode(loc)), + int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); + ASSERT0(LT_FUSSY, gtid < GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode()), "current thread is not needed here; error"); // retrieve schedule kmp_sched_t schedule = @@ -624,7 +624,7 @@ PRINT0(LD_IO, "call kmpc_for_static_init_4\n"); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, - checkSPMDMode(loc)); + __kmpc_is_spmd_exec_mode()); } EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid, @@ -635,7 +635,7 @@ PRINT0(LD_IO, "call kmpc_for_static_init_4u\n"); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, - checkSPMDMode(loc)); + __kmpc_is_spmd_exec_mode()); } EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid, @@ -646,7 +646,7 @@ PRINT0(LD_IO, "call kmpc_for_static_init_8\n"); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, - checkSPMDMode(loc)); + __kmpc_is_spmd_exec_mode()); } EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid, @@ -657,7 +657,7 @@ PRINT0(LD_IO, "call kmpc_for_static_init_8u\n"); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, - checkSPMDMode(loc)); + __kmpc_is_spmd_exec_mode()); } EXTERN diff --git a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu --- a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu @@ -181,14 +181,14 @@ IncParallelLevel(/*ActiveParallel=*/false, __kmpc_impl_activemask()); - if (checkRuntimeUninitialized(loc)) { - ASSERT0(LT_FUSSY, checkSPMDMode(loc), + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected SPMD mode with uninitialized runtime."); return; } // assume this is only called for nested parallel - int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); + int threadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); // unlike actual parallel, threads in the same team do not share // the workTaskDescr in this case and num threads is fixed to 1 @@ -220,14 +220,14 @@ DecParallelLevel(/*ActiveParallel=*/false, __kmpc_impl_activemask()); - if (checkRuntimeUninitialized(loc)) { - ASSERT0(LT_FUSSY, checkSPMDMode(loc), + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected SPMD mode with uninitialized runtime."); return; } // pop stack - int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); + int threadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(threadId); // set new top omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr( @@ -249,8 +249,8 @@ // it's cheap to recalculate this value so we never use the result // of this call. EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) { - int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); - return GetOmpThreadId(tid, checkSPMDMode(loc)); + int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); + return GetOmpThreadId(tid, __kmpc_is_spmd_exec_mode()); } //////////////////////////////////////////////////////////////////////////////// @@ -260,9 +260,9 @@ EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t tid, int32_t num_threads) { PRINT(LD_IO, "call kmpc_push_num_threads %d\n", num_threads); - ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), + ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized."); - tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); + tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(tid) = num_threads; } diff --git a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu --- a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu @@ -159,11 +159,11 @@ kmp_InterWarpCopyFctPtr cpyFct) { return nvptx_parallel_reduce_nowait( global_tid, num_vars, reduce_size, reduce_data, shflFct, cpyFct, - checkSPMDMode(loc), checkRuntimeUninitialized(loc)); + __kmpc_is_spmd_exec_mode(), isRuntimeUninitialized()) } INLINE static bool isMaster(kmp_Ident *loc, uint32_t ThreadId) { - return checkGenericMode(loc) || IsTeamMaster(ThreadId); + return !__kmpc_is_spmd_exec_mode() || IsTeamMaster(ThreadId); } INLINE static uint32_t roundToWarpsize(uint32_t s) { @@ -184,16 +184,16 @@ kmp_ListGlobalFctPtr glredFct) { // Terminate all threads in non-SPMD mode except for the master thread. - if (checkGenericMode(loc) && GetThreadIdInBlock() != GetMasterThreadID()) + if (!__kmpc_is_spmd_exec_mode() && GetThreadIdInBlock() != GetMasterThreadID()) return 0; - uint32_t ThreadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); + uint32_t ThreadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); // In non-generic mode all workers participate in the teams reduction. // In generic mode only the team master participates in the teams // reduction because the workers are waiting for parallel work. uint32_t NumThreads = - checkSPMDMode(loc) ? GetNumberOfOmpThreads(/*isSPMDExecutionMode=*/true) + __kmpc_is_spmd_exec_mode() ? GetNumberOfOmpThreads(/*isSPMDExecutionMode=*/true) : /*Master thread only*/ 1; uint32_t TeamId = GetBlockIdInKernel(); uint32_t NumTeams = GetNumberOfBlocksInKernel(); @@ -225,7 +225,7 @@ ChunkTeamCount = __kmpc_atomic_inc((uint32_t *)&Cnt, num_of_records - 1u); } // Synchronize - if (checkSPMDMode(loc)) + if (__kmpc_is_spmd_exec_mode()) __kmpc_barrier(loc, global_tid); // reduce_data is global or shared so before being reduced within the diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu --- a/openmp/libomptarget/deviceRTLs/common/src/support.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu @@ -34,57 +34,6 @@ return (execution_param & RuntimeMask) == RuntimeInitialized; } -//////////////////////////////////////////////////////////////////////////////// -// Execution Modes based on location parameter fields -//////////////////////////////////////////////////////////////////////////////// - -bool checkSPMDMode(kmp_Ident *loc) { - if (!loc) - return __kmpc_is_spmd_exec_mode(); - - // If SPMD is true then we are not in the UNDEFINED state so - // we can return immediately. - if (loc->reserved_2 & KMP_IDENT_SPMD_MODE) - return true; - - // If not in SPMD mode and runtime required is a valid - // combination of flags so we can return immediately. - if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE)) - return false; - - // We are in underfined state. - return __kmpc_is_spmd_exec_mode(); -} - -bool checkGenericMode(kmp_Ident *loc) { return !checkSPMDMode(loc); } - -bool checkRuntimeUninitialized(kmp_Ident *loc) { - if (!loc) - return isRuntimeUninitialized(); - - // If runtime is required then we know we can't be - // in the undefined mode. We can return immediately. - if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE)) - return false; - - // If runtime is required then we need to check is in - // SPMD mode or not. If not in SPMD mode then we end - // up in the UNDEFINED state that marks the orphaned - // functions. - if (loc->reserved_2 & KMP_IDENT_SPMD_MODE) - return true; - - // Check if we are in an UNDEFINED state. Undefined is denoted by - // non-SPMD + noRuntimeRequired which is a combination that - // cannot actually happen. Undefined states is used to mark orphaned - // functions. - return isRuntimeUninitialized(); -} - -bool checkRuntimeInitialized(kmp_Ident *loc) { - return !checkRuntimeUninitialized(loc); -} - //////////////////////////////////////////////////////////////////////////////// // support: get info from machine //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/common/src/sync.cu b/openmp/libomptarget/deviceRTLs/common/src/sync.cu --- a/openmp/libomptarget/deviceRTLs/common/src/sync.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/sync.cu @@ -42,16 +42,16 @@ } EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) { - if (checkRuntimeUninitialized(loc_ref)) { - ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref), + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected SPMD mode with uninitialized runtime."); __kmpc_barrier_simple_spmd(loc_ref, tid); } else { - tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc_ref)); + tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); int numberOfActiveOMPThreads = - GetNumberOfOmpThreads(checkSPMDMode(loc_ref)); + GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode()); if (numberOfActiveOMPThreads > 1) { - if (checkSPMDMode(loc_ref)) { + if (__kmpc_is_spmd_exec_mode()) { __kmpc_barrier_simple_spmd(loc_ref, tid); } else { // The #threads parameter must be rounded up to the WARPSIZE. diff --git a/openmp/libomptarget/deviceRTLs/common/src/task.cu b/openmp/libomptarget/deviceRTLs/common/src/task.cu --- a/openmp/libomptarget/deviceRTLs/common/src/task.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/task.cu @@ -83,7 +83,7 @@ void *noAliasDepList) { PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n", P64(newKmpTaskDescr)); - ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), + ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized."); // 1. get explicit task descr from kmp task descr omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = @@ -96,7 +96,7 @@ "bad assumptions"); // 2. push new context: update new task descriptor - int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); + int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid); newTaskDescr->CopyForExplicitTask(parentTaskDescr); // set new task descriptor as top @@ -122,7 +122,7 @@ kmp_TaskDescr *newKmpTaskDescr) { PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n", (unsigned long long)newKmpTaskDescr); - ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), + ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized."); // 1. get explicit task descr from kmp task descr omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = @@ -135,7 +135,7 @@ "bad assumptions"); // 2. push new context: update new task descriptor - int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); + int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid); newTaskDescr->CopyForExplicitTask(parentTaskDescr); // set new task descriptor as top @@ -148,7 +148,7 @@ kmp_TaskDescr *newKmpTaskDescr) { PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n", (unsigned long long)newKmpTaskDescr); - ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), + ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized."); // 1. get explicit task descr from kmp task descr omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = @@ -163,7 +163,7 @@ omptarget_nvptx_TaskDescr *parentTaskDescr = newTaskDescr->GetPrevTaskDescr(); // 3... noting to call... is inline // 4. pop context - int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc)); + int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode()); omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, parentTaskDescr); // 5. free diff --git a/openmp/libomptarget/deviceRTLs/common/support.h b/openmp/libomptarget/deviceRTLs/common/support.h --- a/openmp/libomptarget/deviceRTLs/common/support.h +++ b/openmp/libomptarget/deviceRTLs/common/support.h @@ -36,15 +36,6 @@ bool isRuntimeUninitialized(); bool isRuntimeInitialized(); -//////////////////////////////////////////////////////////////////////////////// -// Execution Modes based on location parameter fields -//////////////////////////////////////////////////////////////////////////////// - -bool checkSPMDMode(kmp_Ident *loc); -bool checkGenericMode(kmp_Ident *loc); -bool checkRuntimeUninitialized(kmp_Ident *loc); -bool checkRuntimeInitialized(kmp_Ident *loc); - //////////////////////////////////////////////////////////////////////////////// // get info from machine ////////////////////////////////////////////////////////////////////////////////