Index: runtime/src/kmp_tasking.cpp =================================================================== --- runtime/src/kmp_tasking.cpp +++ runtime/src/kmp_tasking.cpp @@ -2870,7 +2870,7 @@ __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data); } #endif // BUILD_TIED_TASK_STACK - // Install the new data and free the old data + // Install the new data and free the old data (*threads_data_p) = new_data; __kmp_free(old_data); } else { @@ -3751,7 +3751,7 @@ gtid, i, next_task, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds.get_upper_offset())); - __kmp_omp_task(gtid, next_task, true); // schedule new task + __kmpc_omp_task(NULL, gtid, next_task); // schedule new task lower = upper + st; // adjust lower bound for the next iteration } // free the pattern task and exit @@ -3907,7 +3907,7 @@ p->extras = ext1; p->tc = tc1; p->num_t_min = num_t_min; - __kmp_omp_task(gtid, new_task, true); // schedule new task + __kmpc_omp_task(NULL, gtid, new_task); // schedule new task // execute the 1st half of current subrange if (n_tsk0 > num_t_min) @@ -3942,23 +3942,24 @@ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); KMP_DEBUG_ASSERT(task != NULL); + if (nogroup == 0) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_taskgroup(loc, gtid); + } + #if OMPT_SUPPORT && OMPT_OPTIONAL ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + kmp_uint64 iteration_count = *lb < *ub ? *ub - *lb + 1 : *lb - *ub + 1; if (ompt_enabled.ompt_callback_work) { ompt_callbacks.ompt_callback(ompt_callback_work)( ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); + &(task_info->task_data), iteration_count, OMPT_GET_RETURN_ADDRESS(0)); } #endif - if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_taskgroup(loc, gtid); - } - // ========================================================================= // calculate loop parameters kmp_taskloop_bounds_t task_bounds(task, lb, ub); @@ -4066,19 +4067,20 @@ grainsize, extras, tc, task_dup); } - if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_end_taskgroup(loc, gtid); - } #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_work) { ompt_callbacks.ompt_callback(ompt_callback_work)( ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), - &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); + &(task_info->task_data), iteration_count, OMPT_GET_RETURN_ADDRESS(0)); } #endif + + if (nogroup == 0) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_end_taskgroup(loc, gtid); + } KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); } Index: runtime/test/ompt/worksharing/taskloop.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/taskloop.c @@ -0,0 +1,59 @@ +// RUN: %libomp-compile && %libomp-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() { + unsigned int i, j, x; + +#pragma omp parallel num_threads(2) +#pragma omp master +#pragma omp taskloop + for (j = 0; j < 2; j++) { + x++; + } + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: + // CHECK-SAME: parent_task_id={{[0-9]+}} + // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]] + // CHECK-SAME: requested_team_size=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1:[0-9]+]] + // CHECK-SAME: team_size=2, thread_num=0 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: codeptr_ra=0x{{[0-f]+}}, count=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: new_task_id=[[TASK_ID1:[0-9]+]] + // CHECK-SAME: task_type=ompt_task_explicit=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: new_task_id=[[TASK_ID2:[0-9]+]] + // CHECK-SAME: task_type=ompt_task_explicit=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: count=2 + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin: + // CHECK-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID1]] + // CHECK-DAG: {{^.*}}first_task_id=[[TASK_ID1]], second_task_id={{[0-9]+}} + // CHECK-DAG: {{^.*}}first_task_id={{[0-9]+}}, second_task_id=[[TASK_ID2]] + // CHECK-DAG: {{^.*}}first_task_id=[[TASK_ID2]], second_task_id={{[0-9]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0 + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1]], team_size=2, thread_num=0 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + + return 0; +}