Changeset View
Changeset View
Standalone View
Standalone View
openmp/libomptarget/deviceRTLs/common/src/parallel.cu
Show First 20 Lines • Show All 288 Lines • ▼ Show 20 Lines | EXTERN void __kmpc_parallel_51(kmp_Ident *ident, kmp_int32 global_tid, | ||||
int proc_bind, void *fn, void *wrapper_fn, | int proc_bind, void *fn, void *wrapper_fn, | ||||
void **args, size_t nargs) { | void **args, size_t nargs) { | ||||
// Handle the serialized case first, same for SPMD/non-SPMD except that in | // Handle the serialized case first, same for SPMD/non-SPMD except that in | ||||
// SPMD mode we already incremented the parallel level counter, account for | // SPMD mode we already incremented the parallel level counter, account for | ||||
// that. | // that. | ||||
bool InParallelRegion = | bool InParallelRegion = | ||||
(__kmpc_parallel_level(ident, global_tid) > __kmpc_is_spmd_exec_mode()); | (__kmpc_parallel_level(ident, global_tid) > __kmpc_is_spmd_exec_mode()); | ||||
if (!if_expr || InParallelRegion) { | if (!if_expr || InParallelRegion) { | ||||
jdoerfert: ^^^ | |||||
JonChesterfieldAuthorUnsubmitted This is the branch that can be folded after D105699, letting this whole function call turn into a tail call to invoke_microtask. It is unaffected by the code deleted in this patch. JonChesterfield: This is the branch that can be folded after D105699, letting this whole function call turn into… | |||||
__kmpc_serialized_parallel(ident, global_tid); | __kmpc_serialized_parallel(ident, global_tid); | ||||
__kmp_invoke_microtask(global_tid, 0, fn, args, nargs); | __kmp_invoke_microtask(global_tid, 0, fn, args, nargs); | ||||
__kmpc_end_serialized_parallel(ident, global_tid); | __kmpc_end_serialized_parallel(ident, global_tid); | ||||
return; | return; | ||||
} | } | ||||
if (__kmpc_is_spmd_exec_mode()) { | if (__kmpc_is_spmd_exec_mode()) { | ||||
__kmp_invoke_microtask(global_tid, 0, fn, args, nargs); | __kmp_invoke_microtask(global_tid, 0, fn, args, nargs); | ||||
Show All 11 Lines | if (nargs) { | ||||
__kmpc_begin_sharing_variables(&GlobalArgs, nargs); | __kmpc_begin_sharing_variables(&GlobalArgs, nargs); | ||||
// TODO: faster memcpy? | // TODO: faster memcpy? | ||||
for (int I = 0; I < nargs; I++) | for (int I = 0; I < nargs; I++) | ||||
GlobalArgs[I] = args[I]; | GlobalArgs[I] = args[I]; | ||||
} | } | ||||
// TODO: what if that's a parallel region with a single thread? this is | // TODO: what if that's a parallel region with a single thread? this is | ||||
// considered not active in the existing implementation. | // considered not active in the existing implementation. | ||||
bool IsActiveParallelRegion = threadsInTeam != 1; | |||||
int NumWarps = | |||||
threadsInTeam / WARPSIZE + ((threadsInTeam % WARPSIZE) ? 1 : 0); | |||||
// Increment parallel level for non-SPMD warps. | |||||
for (int I = 0; I < NumWarps; ++I) | |||||
parallelLevel[I] += | |||||
(1 + (IsActiveParallelRegion ? OMP_ACTIVE_PARALLEL_LEVEL : 0)); | |||||
// Master signals work to activate workers. | // Master signals work to activate workers. | ||||
__kmpc_barrier_simple_spmd(nullptr, 0); | __kmpc_barrier_simple_spmd(nullptr, 0); | ||||
JonChesterfieldAuthorUnsubmitted Checked the implementation of this, it does not access parallelLevel. JonChesterfield: Checked the implementation of this, it does not access parallelLevel. | |||||
// OpenMP [2.5, Parallel Construct, p.49] | // OpenMP [2.5, Parallel Construct, p.49] | ||||
// There is an implied barrier at the end of a parallel region. After the | // There is an implied barrier at the end of a parallel region. After the | ||||
// end of a parallel region, only the master thread of the team resumes | // end of a parallel region, only the master thread of the team resumes | ||||
// execution of the enclosing task region. | // execution of the enclosing task region. | ||||
// | // | ||||
// The master waits at this barrier until all workers are done. | // The master waits at this barrier until all workers are done. | ||||
__kmpc_barrier_simple_spmd(nullptr, 0); | __kmpc_barrier_simple_spmd(nullptr, 0); | ||||
// Decrement parallel level for non-SPMD warps. | |||||
for (int I = 0; I < NumWarps; ++I) | |||||
parallelLevel[I] -= | |||||
(1 + (IsActiveParallelRegion ? OMP_ACTIVE_PARALLEL_LEVEL : 0)); | |||||
// TODO: Is synchronization needed since out of parallel execution? | // TODO: Is synchronization needed since out of parallel execution? | ||||
if (nargs) | if (nargs) | ||||
__kmpc_end_sharing_variables(); | __kmpc_end_sharing_variables(); | ||||
// TODO: proc_bind is a noop? | // TODO: proc_bind is a noop? | ||||
// if (proc_bind != proc_bind_default) | // if (proc_bind != proc_bind_default) | ||||
// __kmpc_push_proc_bind(ident, global_tid, proc_bind); | // __kmpc_push_proc_bind(ident, global_tid, proc_bind); | ||||
} | } | ||||
#pragma omp end declare target | #pragma omp end declare target |
^^^