Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Standalone View
runtime/src/kmp_tasking.cpp
Show First 20 Lines • Show All 1,593 Lines • ▼ Show 20 Lines | static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, | ||||
kmp_info_t *thread; | kmp_info_t *thread; | ||||
int thread_finished = FALSE; | int thread_finished = FALSE; | ||||
KMP_SET_THREAD_STATE_BLOCK(TASKWAIT); | KMP_SET_THREAD_STATE_BLOCK(TASKWAIT); | ||||
KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref)); | KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref)); | ||||
if (__kmp_tasking_mode != tskm_immediate_exec) { | if (__kmp_tasking_mode != tskm_immediate_exec) { | ||||
thread = __kmp_threads[gtid]; | thread = __kmp_threads[gtid]; | ||||
taskdata = thread->th.th_current_task; | taskdata = thread->th.th_current_task; | ||||
hbae: Can we change the function name to `__kmp_omp_taskloop_task`?
__kmpc_* is usually for ABI… | |||||
#if OMPT_SUPPORT && OMPT_OPTIONAL | #if OMPT_SUPPORT && OMPT_OPTIONAL | ||||
ompt_data_t *my_task_data; | ompt_data_t *my_task_data; | ||||
ompt_data_t *my_parallel_data; | ompt_data_t *my_parallel_data; | ||||
if (ompt) { | if (ompt) { | ||||
my_task_data = &(taskdata->ompt_task_info.task_data); | my_task_data = &(taskdata->ompt_task_info.task_data); | ||||
my_parallel_data = OMPT_CUR_TEAM_DATA(thread); | my_parallel_data = OMPT_CUR_TEAM_DATA(thread); | ||||
▲ Show 20 Lines • Show All 2,135 Lines • ▼ Show 20 Lines | for (i = 0; i < num_tasks; ++i) { | ||||
if (ptask_dup != NULL) // set lastprivate flag, construct fistprivates, etc. | if (ptask_dup != NULL) // set lastprivate flag, construct fistprivates, etc. | ||||
ptask_dup(next_task, task, lastpriv); | ptask_dup(next_task, task, lastpriv); | ||||
KA_TRACE(40, | KA_TRACE(40, | ||||
("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, " | ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, " | ||||
"upper %lld stride %lld, (offsets %p %p)\n", | "upper %lld stride %lld, (offsets %p %p)\n", | ||||
gtid, i, next_task, lower, upper, st, | gtid, i, next_task, lower, upper, st, | ||||
next_task_bounds.get_lower_offset(), | next_task_bounds.get_lower_offset(), | ||||
next_task_bounds.get_upper_offset())); | next_task_bounds.get_upper_offset())); | ||||
__kmp_omp_task(gtid, next_task, true); // schedule new task | __kmpc_omp_task(NULL, gtid, next_task); // schedule new task | ||||
same as below protze.joachim: same as below | |||||
lower = upper + st; // adjust lower bound for the next iteration | lower = upper + st; // adjust lower bound for the next iteration | ||||
} | } | ||||
// free the pattern task and exit | // free the pattern task and exit | ||||
__kmp_task_start(gtid, task, current_task); // make internal bookkeeping | __kmp_task_start(gtid, task, current_task); // make internal bookkeeping | ||||
// do not execute the pattern task, just do internal bookkeeping | // do not execute the pattern task, just do internal bookkeeping | ||||
__kmp_task_finish(gtid, task, current_task); | __kmp_task_finish(gtid, task, current_task); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 139 Lines • ▼ Show 20 Lines | #endif | ||||
p->task_dup = task_dup; | p->task_dup = task_dup; | ||||
p->st = st; | p->st = st; | ||||
p->ub_glob = ub_glob; | p->ub_glob = ub_glob; | ||||
p->num_tasks = n_tsk1; | p->num_tasks = n_tsk1; | ||||
p->grainsize = grainsize; | p->grainsize = grainsize; | ||||
p->extras = ext1; | p->extras = ext1; | ||||
p->tc = tc1; | p->tc = tc1; | ||||
p->num_t_min = num_t_min; | p->num_t_min = num_t_min; | ||||
__kmp_omp_task(gtid, new_task, true); // schedule new task | __kmpc_omp_task(NULL, gtid, new_task); // schedule new task | ||||
We somehow need to pass a return-address in here. So probably a new variant of __kmpc_omp_task, which takes the address and adds it to the ompt-callback invocation? protze.joachim: We somehow need to pass a return-address in here. So probably a new variant of __kmpc_omp_task… | |||||
// execute the 1st half of current subrange | // execute the 1st half of current subrange | ||||
if (n_tsk0 > num_t_min) | if (n_tsk0 > num_t_min) | ||||
__kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0, | __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0, | ||||
ext0, tc0, num_t_min, task_dup); | ext0, tc0, num_t_min, task_dup); | ||||
else | else | ||||
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, | __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, | ||||
gr_size0, ext0, tc0, task_dup); | gr_size0, ext0, tc0, task_dup); | ||||
Not Done ReplyInline ActionsI think it is better to keep the original code (__kmp_omp_task() call) at this line. hbae: I think it is better to keep the original code (`__kmp_omp_task()` call) at this line. | |||||
KA_TRACE(40, ("__kmpc_taskloop_recur(exit): T#%d\n", gtid)); | KA_TRACE(40, ("__kmpc_taskloop_recur(exit): T#%d\n", gtid)); | ||||
} | } | ||||
/*! | /*! | ||||
@ingroup TASKING | @ingroup TASKING | ||||
@param loc Source location information | @param loc Source location information | ||||
@param gtid Global thread ID | @param gtid Global thread ID | ||||
Show All 10 Lines | |||||
Execute the taskloop construct. | Execute the taskloop construct. | ||||
*/ | */ | ||||
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, | void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, | ||||
kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, | kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, | ||||
int sched, kmp_uint64 grainsize, void *task_dup) { | int sched, kmp_uint64 grainsize, void *task_dup) { | ||||
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); | kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); | ||||
KMP_DEBUG_ASSERT(task != NULL); | KMP_DEBUG_ASSERT(task != NULL); | ||||
#if OMPT_SUPPORT && OMPT_OPTIONAL | |||||
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); | |||||
ompt_task_info_t *task_info = __ompt_get_task_info_object(0); | |||||
if (ompt_enabled.ompt_callback_work) { | |||||
ompt_callbacks.ompt_callback(ompt_callback_work)( | |||||
ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), | |||||
&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); | |||||
} | |||||
#endif | |||||
if (nogroup == 0) { | if (nogroup == 0) { | ||||
Shouldn't there be st(ride) in the formula? protze.joachim: Shouldn't there be st(ride) in the formula? | |||||
Why not use calculated below iterations count - tc? It is computed at lines 3980 - 3995, including the check against zero. AndreyChurbanov: Why not use calculated below iterations count - tc? It is computed at lines 3980 - 3995… | |||||
There is a difference between the iteration count and the task count.
tc seems to be the task count, not the iteration count. And st(ride) is not needed for the calculation of the iteration count, right? sconvent: There is a difference between the iteration count and the task count.
From the spec (TR6 page… | |||||
No, the tc states for "trace count" which is exactly iterations count. And the stride is important here. AndreyChurbanov: No, the tc states for "trace count" which is exactly iterations count. And the stride is… | |||||
You're right, I updated the diff to use tc. sconvent: You're right, I updated the diff to use `tc`. | |||||
#if OMPT_SUPPORT && OMPT_OPTIONAL | #if OMPT_SUPPORT && OMPT_OPTIONAL | ||||
OMPT_STORE_RETURN_ADDRESS(gtid); | OMPT_STORE_RETURN_ADDRESS(gtid); | ||||
#endif | #endif | ||||
__kmpc_taskgroup(loc, gtid); | __kmpc_taskgroup(loc, gtid); | ||||
} | } | ||||
// ========================================================================= | // ========================================================================= | ||||
// calculate loop parameters | // calculate loop parameters | ||||
Show All 23 Lines | #endif | ||||
if (tc == 0) { | if (tc == 0) { | ||||
KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid)); | KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid)); | ||||
// free the pattern task and exit | // free the pattern task and exit | ||||
__kmp_task_start(gtid, task, current_task); | __kmp_task_start(gtid, task, current_task); | ||||
// do not execute anything for zero-trip loop | // do not execute anything for zero-trip loop | ||||
__kmp_task_finish(gtid, task, current_task); | __kmp_task_finish(gtid, task, current_task); | ||||
return; | return; | ||||
} | } | ||||
#if OMPT_SUPPORT && OMPT_OPTIONAL | |||||
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); | |||||
ompt_task_info_t *task_info = __ompt_get_task_info_object(0); | |||||
if (ompt_enabled.ompt_callback_work) { | |||||
ompt_callbacks.ompt_callback(ompt_callback_work)( | |||||
ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), | |||||
&(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); | |||||
} | |||||
#endif | |||||
if (num_tasks_min == 0) | if (num_tasks_min == 0) | ||||
// TODO: can we choose better default heuristic? | // TODO: can we choose better default heuristic? | ||||
num_tasks_min = | num_tasks_min = | ||||
KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE); | KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE); | ||||
// compute num_tasks/grainsize based on the input provided | // compute num_tasks/grainsize based on the input provided | ||||
switch (sched) { | switch (sched) { | ||||
case 0: // no schedule clause specified, we can choose the default | case 0: // no schedule clause specified, we can choose the default | ||||
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" | ||||
gtid, tc, num_tasks, num_tasks_min, grainsize, extras)); | gtid, tc, num_tasks, num_tasks_min, grainsize, extras)); | ||||
#if OMPT_SUPPORT && OMPT_OPTIONAL | #if OMPT_SUPPORT && OMPT_OPTIONAL | ||||
OMPT_STORE_RETURN_ADDRESS(gtid); | OMPT_STORE_RETURN_ADDRESS(gtid); | ||||
#endif | #endif | ||||
__kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, | __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, | ||||
grainsize, extras, tc, task_dup); | grainsize, extras, tc, task_dup); | ||||
} | } | ||||
if (nogroup == 0) { | |||||
#if OMPT_SUPPORT && OMPT_OPTIONAL | |||||
OMPT_STORE_RETURN_ADDRESS(gtid); | |||||
#endif | |||||
__kmpc_end_taskgroup(loc, gtid); | |||||
} | |||||
#if OMPT_SUPPORT && OMPT_OPTIONAL | #if OMPT_SUPPORT && OMPT_OPTIONAL | ||||
if (ompt_enabled.ompt_callback_work) { | if (ompt_enabled.ompt_callback_work) { | ||||
ompt_callbacks.ompt_callback(ompt_callback_work)( | ompt_callbacks.ompt_callback(ompt_callback_work)( | ||||
ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), | ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), | ||||
&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); | &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); | ||||
} | } | ||||
#endif | #endif | ||||
if (nogroup == 0) { | |||||
#if OMPT_SUPPORT && OMPT_OPTIONAL | |||||
OMPT_STORE_RETURN_ADDRESS(gtid); | |||||
#endif | |||||
__kmpc_end_taskgroup(loc, gtid); | |||||
} | |||||
KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); | KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); | ||||
} | } | ||||
#endif | #endif |
Can we change the function name to __kmp_omp_taskloop_task?
__kmpc_* is usually for ABI functions.