Index: openmp/trunk/runtime/src/kmp_tasking.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_tasking.cpp +++ openmp/trunk/runtime/src/kmp_tasking.cpp @@ -1596,6 +1596,61 @@ return res; } +// __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule +// a taskloop task with the correct OMPT return address +// +// loc_ref: location of original task pragma (ignored) +// gtid: Global Thread ID of encountering thread +// new_task: non-thread-switchable task thunk allocated by +// __kmp_omp_task_alloc() +// codeptr_ra: return address for OMPT callback +// Returns: +// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to +// be resumed later. +// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be +// resumed later. +kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task, void *codeptr_ra) { + kmp_int32 res; + KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); + +#if KMP_DEBUG || OMPT_SUPPORT + kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); +#endif + KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, + new_taskdata)); + +#if OMPT_SUPPORT + kmp_taskdata_t *parent = NULL; + if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) { + parent = new_taskdata->td_parent; + if (!parent->ompt_task_info.frame.enter_frame) + parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_enabled.ompt_callback_task_create) { + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + parent ? &(parent->ompt_task_info.task_data) : &task_data, + parent ? &(parent->ompt_task_info.frame) : NULL, + &(new_taskdata->ompt_task_info.task_data), + ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, + codeptr_ra); + } + } +#endif + + res = __kmp_omp_task(gtid, new_task, true); + + KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " + "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", + gtid, loc_ref, new_taskdata)); +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { + parent->ompt_task_info.frame.enter_frame = NULL; + } +#endif + return res; +} + template static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, void *frame_address, @@ -3680,23 +3735,28 @@ // __kmp_taskloop_linear: Start tasks of the taskloop linearly // -// loc Source location information -// gtid Global thread ID -// task Pattern task, exposes the loop iteration range -// lb Pointer to loop lower bound in task structure -// ub Pointer to loop upper bound in task structure -// st Loop stride -// ub_glob Global upper bound (used for lastprivate check) -// num_tasks Number of tasks to execute -// grainsize Number of loop iterations per task -// extras Number of chunks with grainsize+1 iterations -// tc Iterations count -// task_dup Tasks duplication routine +// loc Source location information +// gtid Global thread ID +// task Pattern task, exposes the loop iteration range +// lb Pointer to loop lower bound in task structure +// ub Pointer to loop upper bound in task structure +// st Loop stride +// ub_glob Global upper bound (used for lastprivate check) +// num_tasks Number of tasks to execute +// grainsize Number of loop iterations per task +// extras Number of chunks with grainsize+1 iterations +// tc Iterations count +// task_dup Tasks duplication routine +// codeptr_ra Return address for OMPT events void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, kmp_uint64 ub_glob, kmp_uint64 num_tasks, kmp_uint64 grainsize, kmp_uint64 extras, - kmp_uint64 tc, void *task_dup) { + kmp_uint64 tc, +#if OMPT_SUPPORT + void *codeptr_ra, +#endif + void *task_dup) { KMP_COUNT_BLOCK(OMP_TASKLOOP); KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling); p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; @@ -3764,7 +3824,12 @@ gtid, i, next_task, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds.get_upper_offset())); +#if OMPT_SUPPORT + __kmp_omp_taskloop_task(NULL, gtid, next_task, + codeptr_ra); // schedule new task +#else __kmp_omp_task(gtid, next_task, true); // schedule new task +#endif lower = upper + st; // adjust lower bound for the next iteration } // free the pattern task and exit @@ -3787,11 +3852,17 @@ kmp_uint64 extras; kmp_uint64 tc; kmp_uint64 num_t_min; +#if OMPT_SUPPORT + void *codeptr_ra; +#endif } __taskloop_params_t; void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *, kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64, +#if OMPT_SUPPORT + void *, +#endif void *); // Execute part of the the taskloop submitted as a task. @@ -3810,6 +3881,9 @@ kmp_uint64 extras = p->extras; kmp_uint64 tc = p->tc; kmp_uint64 num_t_min = p->num_t_min; +#if OMPT_SUPPORT + void *codeptr_ra = p->codeptr_ra; +#endif #if KMP_DEBUG kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); KMP_DEBUG_ASSERT(task != NULL); @@ -3821,10 +3895,18 @@ KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min); if (num_tasks > num_t_min) __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks, - grainsize, extras, tc, num_t_min, task_dup); + grainsize, extras, tc, num_t_min, +#if OMPT_SUPPORT + codeptr_ra, +#endif + task_dup); else __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks, - grainsize, extras, tc, task_dup); + grainsize, extras, tc, +#if OMPT_SUPPORT + codeptr_ra, +#endif + task_dup); KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid)); return 0; @@ -3833,24 +3915,29 @@ // Schedule part of the the taskloop as a task, // execute the rest of the the taskloop. // -// loc Source location information -// gtid Global thread ID -// task Pattern task, exposes the loop iteration range -// lb Pointer to loop lower bound in task structure -// ub Pointer to loop upper bound in task structure -// st Loop stride -// ub_glob Global upper bound (used for lastprivate check) -// num_tasks Number of tasks to execute -// grainsize Number of loop iterations per task -// extras Number of chunks with grainsize+1 iterations -// tc Iterations count -// num_t_min Threashold to launch tasks recursively -// task_dup Tasks duplication routine +// loc Source location information +// gtid Global thread ID +// task Pattern task, exposes the loop iteration range +// lb Pointer to loop lower bound in task structure +// ub Pointer to loop upper bound in task structure +// st Loop stride +// ub_glob Global upper bound (used for lastprivate check) +// num_tasks Number of tasks to execute +// grainsize Number of loop iterations per task +// extras Number of chunks with grainsize+1 iterations +// tc Iterations count +// num_t_min Threashold to launch tasks recursively +// task_dup Tasks duplication routine +// codeptr_ra Return address for OMPT events void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, kmp_uint64 ub_glob, kmp_uint64 num_tasks, kmp_uint64 grainsize, kmp_uint64 extras, - kmp_uint64 tc, kmp_uint64 num_t_min, void *task_dup) { + kmp_uint64 tc, kmp_uint64 num_t_min, +#if OMPT_SUPPORT + void *codeptr_ra, +#endif + void *task_dup) { #if KMP_DEBUG kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); KMP_DEBUG_ASSERT(task != NULL); @@ -3920,15 +4007,32 @@ p->extras = ext1; p->tc = tc1; p->num_t_min = num_t_min; - __kmp_omp_task(gtid, new_task, true); // schedule new task +#if OMPT_SUPPORT + p->codeptr_ra = codeptr_ra; +#endif + +#if OMPT_SUPPORT + // schedule new task with correct return address for OMPT events + __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra); +#else + __kmp_omp_task(NULL, gtid, new_task); // schedule new task +#endif // execute the 1st half of current subrange if (n_tsk0 > num_t_min) __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0, - ext0, tc0, num_t_min, task_dup); + ext0, tc0, num_t_min, +#if OMPT_SUPPORT + codeptr_ra, +#endif + task_dup); else __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, - gr_size0, ext0, tc0, task_dup); + gr_size0, ext0, tc0, +#if OMPT_SUPPORT + codeptr_ra, +#endif + task_dup); KA_TRACE(40, ("__kmpc_taskloop_recur(exit): T#%d\n", gtid)); } @@ -3955,16 +4059,6 @@ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); KMP_DEBUG_ASSERT(task != NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_task_info_object(0); - if (ompt_enabled.ompt_callback_work) { - ompt_callbacks.ompt_callback(ompt_callback_work)( - ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); - } -#endif - if (nogroup == 0) { #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); @@ -4005,6 +4099,17 @@ __kmp_task_finish(gtid, task, current_task); return; } + +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); + } +#endif + if (num_tasks_min == 0) // TODO: can we choose better default heuristic? num_tasks_min = @@ -4051,47 +4156,51 @@ if (if_val == 0) { // if(0) specified, mark task as serial taskdata->td_flags.task_serial = 1; taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif // always start serial tasks linearly __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, - grainsize, extras, tc, task_dup); + grainsize, extras, tc, +#if OMPT_SUPPORT + OMPT_GET_RETURN_ADDRESS(0), +#endif + task_dup); // !taskdata->td_flags.native => currently force linear spawning of tasks // for GOMP_taskloop } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) { KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" "(%lld), grain %llu, extras %llu\n", gtid, tc, num_tasks, num_tasks_min, grainsize, extras)); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, - grainsize, extras, tc, num_tasks_min, task_dup); + grainsize, extras, tc, num_tasks_min, +#if OMPT_SUPPORT + OMPT_GET_RETURN_ADDRESS(0), +#endif + task_dup); } else { KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" "(%lld), grain %llu, extras %llu\n", gtid, tc, num_tasks, num_tasks_min, grainsize, extras)); -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, - grainsize, extras, tc, task_dup); - } - - if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); + grainsize, extras, tc, +#if OMPT_SUPPORT + OMPT_GET_RETURN_ADDRESS(0), #endif - __kmpc_end_taskgroup(loc, gtid); + task_dup); } + #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_work) { ompt_callbacks.ompt_callback(ompt_callback_work)( ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), - &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); + &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); } #endif + + if (nogroup == 0) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_end_taskgroup(loc, gtid); + } KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); } Index: openmp/trunk/runtime/test/ompt/tasks/taskloop.c =================================================================== --- openmp/trunk/runtime/test/ompt/tasks/taskloop.c +++ openmp/trunk/runtime/test/ompt/tasks/taskloop.c @@ -0,0 +1,62 @@ +// RUN: %libomp-compile && %libomp-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() { + unsigned int i, j, x; + +#pragma omp parallel num_threads(2) +#pragma omp master +#pragma omp taskloop + for (j = 0; j < 5; j += 3) { + x++; + } + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: + // CHECK-SAME: parent_task_id={{[0-9]+}} + // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]] + // CHECK-SAME: requested_team_size=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1:[0-9]+]] + // CHECK-SAME: team_size=2, thread_num=0 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]], count=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: new_task_id=[[TASK_ID1:[0-9]+]] + // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-SAME: task_type=ompt_task_explicit=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: new_task_id=[[TASK_ID2:[0-9]+]] + // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-SAME: task_type=ompt_task_explicit=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: count=2 + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin: + // Schedule events: + // CHECK-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID1]] + // CHECK-DAG: {{^.*}}first_task_id=[[TASK_ID1]], second_task_id={{[0-9]+}} + // CHECK-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID2]] + // CHECK-DAG: {{^.*}}first_task_id=[[TASK_ID2]], second_task_id={{[0-9]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0 + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]], team_size=2, thread_num=0 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + + return 0; +}