Index: runtime/src/kmp_runtime.cpp =================================================================== --- runtime/src/kmp_runtime.cpp +++ runtime/src/kmp_runtime.cpp @@ -3890,11 +3890,11 @@ ompt_thread_initial, __ompt_get_thread_data_internal()); } ompt_data_t *task_data; - __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); - if (ompt_enabled.ompt_callback_task_create) { - ompt_callbacks.ompt_callback(ompt_callback_task_create)( - NULL, NULL, task_data, ompt_task_initial, 0, NULL); - // initial task has nothing to return to + ompt_data_t *parallel_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial); } ompt_set_thread_state(root_thread, ompt_state_work_serial); @@ -3984,6 +3984,13 @@ #endif /* KMP_OS_WINDOWS */ #if OMPT_SUPPORT + ompt_data_t *task_data; + ompt_data_t *parallel_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, parallel_data, task_data, 1, 1, ompt_task_initial); + } if (ompt_enabled.ompt_callback_thread_end) { ompt_callbacks.ompt_callback(ompt_callback_thread_end)( &(root->r.r_uber_thread->th.ompt_thread_info.thread_data)); Index: runtime/src/ompt-general.cpp =================================================================== --- runtime/src/ompt-general.cpp +++ runtime/src/ompt-general.cpp @@ -361,10 +361,11 @@ ompt_thread_initial, __ompt_get_thread_data_internal()); } ompt_data_t *task_data; - __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); - if (ompt_enabled.ompt_callback_task_create) { - ompt_callbacks.ompt_callback(ompt_callback_task_create)( - NULL, NULL, task_data, ompt_task_initial, 0, NULL); + ompt_data_t *parallel_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial); } ompt_set_thread_state(root_thread, ompt_state_work_serial); Index: runtime/test/ompt/callback.h =================================================================== --- runtime/test/ompt/callback.h +++ runtime/test/ompt/callback.h @@ -451,10 +451,29 @@ if(task_data->ptr) printf("%s\n", "0: task_data initially not null"); task_data->value = ompt_get_unique_id(); - printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num); + + //there is no parallel_begin callback for implicit parallel region + //thus it is initialized in initial task + if(flags & ompt_task_initial) + { + char buffer[2048]; + + format_task_type(flags, buffer); + if(parallel_data->ptr) + printf("%s\n", "0: parallel_data initially not null"); + parallel_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, 0 , NULL, NULL, task_data->value, NULL, buffer, flags, "no"); + } else { + printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num); + } + break; case ompt_scope_end: - printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num); + if(flags & ompt_task_initial){ + printf("%" PRIu64 ": ompt_event_initial_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num); + } else { + printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num); + } break; } } @@ -627,17 +646,6 @@ format_task_type(type, buffer); - //there is no parallel_begin callback for implicit parallel region - //thus it is initialized in initial task - if(type & ompt_task_initial) - { - ompt_data_t *parallel_data; - ompt_get_parallel_info(0, ¶llel_data, NULL); - if(parallel_data->ptr) - printf("%s\n", "0: parallel_data initially not null"); - parallel_data->value = ompt_get_unique_id(); - } - printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL, encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no"); } Index: runtime/test/ompt/misc/interoperability.cpp =================================================================== --- runtime/test/ompt/misc/interoperability.cpp +++ runtime/test/ompt/misc/interoperability.cpp @@ -75,6 +75,10 @@ // CHECK-SAME: parallel_id=[[PARALLEL_ID_1]], task_id=[[PARENT_TASK_ID_1]] // CHECK-SAME: invoker={{[0-9]+}} +// CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_initial_task_end: +// CHECK-SAME: parallel_id={{[0-9]+}}, task_id=[[PARENT_TASK_ID_1]], +// CHECK-SAME: team_size=1, thread_num=1 + // CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_thread_end: // CHECK-SAME: thread_id=[[MASTER_ID_1]] @@ -101,6 +105,10 @@ // CHECK-SAME: parallel_id=[[PARALLEL_ID_2]], task_id=[[PARENT_TASK_ID_2]] // CHECK-SAME: invoker={{[0-9]+}} +// CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_initial_task_end: +// CHECK-SAME: parallel_id={{[0-9]+}}, task_id=[[PARENT_TASK_ID_2]], +// CHECK-SAME: team_size=1, thread_num=1 + // CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_thread_end: // CHECK-SAME: thread_id=[[MASTER_ID_2]]