Changeset View
Changeset View
Standalone View
Standalone View
openmp/libomptarget/deviceRTLs/common/src/parallel.cu
Show First 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | if (NumThreads < WARPSIZE) { | ||||
NumThreads = (NumThreads & ~((uint16_t)WARPSIZE - 1)); | NumThreads = (NumThreads & ~((uint16_t)WARPSIZE - 1)); | ||||
} | } | ||||
#endif | #endif | ||||
return NumThreads; | return NumThreads; | ||||
} | } | ||||
// This routine is always called by the team master.. | // This routine is always called by the team master.. | ||||
EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn) { | EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, | ||||
kmp_int32 NumThreadsClause) { | |||||
PRINT0(LD_IO, "call to __kmpc_kernel_prepare_parallel\n"); | PRINT0(LD_IO, "call to __kmpc_kernel_prepare_parallel\n"); | ||||
omptarget_nvptx_workFn = WorkFn; | omptarget_nvptx_workFn = WorkFn; | ||||
// This routine is only called by the team master. The team master is | // This routine is only called by the team master. The team master is | ||||
// the first thread of the last warp. It always has the logical thread | // the first thread of the last warp. It always has the logical thread | ||||
// id of 0 (since it is a shadow for the first worker thread). | // id of 0 (since it is a shadow for the first worker thread). | ||||
const int threadId = 0; | const int threadId = 0; | ||||
omptarget_nvptx_TaskDescr *currTaskDescr = | omptarget_nvptx_TaskDescr *currTaskDescr = | ||||
omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId); | omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId); | ||||
ASSERT0(LT_FUSSY, currTaskDescr, "expected a top task descr"); | ASSERT0(LT_FUSSY, currTaskDescr, "expected a top task descr"); | ||||
ASSERT0(LT_FUSSY, !currTaskDescr->InParallelRegion(), | ASSERT0(LT_FUSSY, !currTaskDescr->InParallelRegion(), | ||||
"cannot be called in a parallel region."); | "cannot be called in a parallel region."); | ||||
if (currTaskDescr->InParallelRegion()) { | if (currTaskDescr->InParallelRegion()) { | ||||
PRINT0(LD_PAR, "already in parallel: go seq\n"); | PRINT0(LD_PAR, "already in parallel: go seq\n"); | ||||
return; | return; | ||||
} | } | ||||
uint16_t &NumThreadsClause = | |||||
omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(threadId); | |||||
uint16_t NumThreads = | uint16_t NumThreads = | ||||
determineNumberOfThreads(NumThreadsClause, nThreads, threadLimit); | determineNumberOfThreads(NumThreadsClause, nThreads, threadLimit); | ||||
if (NumThreadsClause != 0) { | if (NumThreadsClause != 0) { | ||||
// Reset request to avoid propagating to successive #parallel | // Reset request to avoid propagating to successive #parallel | ||||
NumThreadsClause = 0; | NumThreadsClause = 0; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 144 Lines • ▼ Show 20 Lines | |||||
EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) { | EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) { | ||||
return GetOmpThreadId(); | return GetOmpThreadId(); | ||||
} | } | ||||
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||
// push params | // push params | ||||
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||
EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t tid, | |||||
int32_t num_threads) { | |||||
PRINT(LD_IO, "call kmpc_push_num_threads %d\n", num_threads); | |||||
ASSERT0(LT_FUSSY, isRuntimeInitialized(), | |||||
"Runtime must be initialized."); | |||||
tid = GetLogicalThreadIdInBlock(); | |||||
omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(tid) = | |||||
num_threads; | |||||
} | |||||
// Do nothing. The host guarantees we started the requested number of | // Do nothing. The host guarantees we started the requested number of | ||||
// teams and we only need inspection of gridDim. | // teams and we only need inspection of gridDim. | ||||
EXTERN void __kmpc_push_num_teams(kmp_Ident *loc, int32_t tid, | EXTERN void __kmpc_push_num_teams(kmp_Ident *loc, int32_t tid, | ||||
int32_t num_teams, int32_t thread_limit) { | int32_t num_teams, int32_t thread_limit) { | ||||
PRINT(LD_IO, "call kmpc_push_num_teams %d\n", (int)num_teams); | PRINT(LD_IO, "call kmpc_push_num_teams %d\n", (int)num_teams); | ||||
ASSERT0(LT_FUSSY, 0, "should never have anything with new teams on device"); | ASSERT0(LT_FUSSY, 0, "should never have anything with new teams on device"); | ||||
} | } | ||||
Show All 23 Lines | if (!if_expr || InParallelRegion) { | ||||
return; | return; | ||||
} | } | ||||
if (__kmpc_is_spmd_exec_mode()) { | if (__kmpc_is_spmd_exec_mode()) { | ||||
__kmp_invoke_microtask(global_tid, 0, fn, args, nargs); | __kmp_invoke_microtask(global_tid, 0, fn, args, nargs); | ||||
return; | return; | ||||
} | } | ||||
// Handle the num_threads clause. | __kmpc_kernel_prepare_parallel((void *)wrapper_fn, num_threads); | ||||
if (num_threads != -1) | |||||
__kmpc_push_num_threads(ident, global_tid, num_threads); | |||||
__kmpc_kernel_prepare_parallel((void *)wrapper_fn); | |||||
if (nargs) { | if (nargs) { | ||||
void **GlobalArgs; | void **GlobalArgs; | ||||
__kmpc_begin_sharing_variables(&GlobalArgs, nargs); | __kmpc_begin_sharing_variables(&GlobalArgs, nargs); | ||||
// TODO: faster memcpy? | // TODO: faster memcpy? | ||||
#pragma unroll | #pragma unroll | ||||
for (int I = 0; I < nargs; I++) | for (int I = 0; I < nargs; I++) | ||||
GlobalArgs[I] = args[I]; | GlobalArgs[I] = args[I]; | ||||
Show All 38 Lines |