Index: runtime/src/kmp_tasking.cpp =================================================================== --- runtime/src/kmp_tasking.cpp +++ runtime/src/kmp_tasking.cpp @@ -1512,6 +1512,21 @@ bool serialize_immediate) { kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); +#if OMPT_SUPPORT + kmp_taskdata_t *parent; + if (UNLIKELY(ompt_enabled.enabled)) { + parent = new_taskdata->td_parent; + if (ompt_enabled.ompt_callback_task_create) { + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + parent ? &(parent->ompt_task_info.task_data) : &task_data, + parent ? &(parent->ompt_task_info.frame) : NULL, + &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0, + OMPT_GET_RETURN_ADDRESS(0)); + } + } +#endif + /* Should we execute the new task or queue it? For now, let's just always try to queue it. If the queue fills up, then we'll execute it. */ #if OMP_45_ENABLED @@ -2870,7 +2885,7 @@ __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data); } #endif // BUILD_TIED_TASK_STACK - // Install the new data and free the old data + // Install the new data and free the old data (*threads_data_p) = new_data; __kmp_free(old_data); } else { @@ -3942,23 +3957,24 @@ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); KMP_DEBUG_ASSERT(task != NULL); + if (nogroup == 0) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_taskgroup(loc, gtid); + } + #if OMPT_SUPPORT && OMPT_OPTIONAL ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + kmp_uint64 iteration_count = *lb < *ub ? *ub - *lb + 1 : *lb - *ub + 1; if (ompt_enabled.ompt_callback_work) { ompt_callbacks.ompt_callback(ompt_callback_work)( ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); + &(task_info->task_data), iteration_count, OMPT_GET_RETURN_ADDRESS(0)); } #endif - if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_taskgroup(loc, gtid); - } - // ========================================================================= // calculate loop parameters kmp_taskloop_bounds_t task_bounds(task, lb, ub); @@ -4066,19 +4082,20 @@ grainsize, extras, tc, task_dup); } - if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_end_taskgroup(loc, gtid); - } #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_work) { ompt_callbacks.ompt_callback(ompt_callback_work)( ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), - &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); + &(task_info->task_data), iteration_count, OMPT_GET_RETURN_ADDRESS(0)); } #endif + + if (nogroup == 0) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_end_taskgroup(loc, gtid); + } KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); } Index: runtime/test/ompt/worksharing/taskloop.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/taskloop.c @@ -0,0 +1,59 @@ +// RUN: %libomp-compile && %libomp-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() { + unsigned int i, j, x; + +#pragma omp parallel num_threads(2) +#pragma omp master +#pragma omp taskloop + for (j = 0; j < 2; j++) { + x++; + } + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: + // CHECK-SAME: parent_task_id={{[0-9]+}} + // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]] + // CHECK-SAME: requested_team_size=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1:[0-9]+]] + // CHECK-SAME: team_size=2, thread_num=0 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: codeptr_ra=0x{{[0-f]+}}, count=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: new_task_id=[[TASK_ID1:[0-9]+]] + // CHECK-SAME: task_type=ompt_task_explicit=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: new_task_id=[[TASK_ID2:[0-9]+]] + // CHECK-SAME: task_type=ompt_task_explicit=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: count=2 + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin: + // CHECK-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID1]] + // CHECK-DAG: {{^.*}}first_task_id=[[TASK_ID1]], second_task_id={{[0-9]+}} + // CHECK-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID2]] + // CHECK-DAG: {{^.*}}first_task_id=[[TASK_ID2]], second_task_id={{[0-9]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0 + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]], team_size=2, thread_num=0 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + + return 0; +}