Index: runtime/src/kmp_csupport.cpp =================================================================== --- runtime/src/kmp_csupport.cpp +++ runtime/src/kmp_csupport.cpp @@ -556,7 +556,8 @@ if (ompt_enabled.ompt_callback_parallel_end) { ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, - ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); + ompt_parallel_invoker_program | ompt_parallel_team, + OMPT_LOAD_RETURN_ADDRESS(global_tid)); } __ompt_lw_taskteam_unlink(this_thr); this_thr->th.ompt_thread_info.state = ompt_state_overhead; Index: runtime/src/kmp_runtime.cpp =================================================================== --- runtime/src/kmp_runtime.cpp +++ runtime/src/kmp_runtime.cpp @@ -1225,8 +1225,8 @@ ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( &(parent_task_info->task_data), &(parent_task_info->frame), - &ompt_parallel_data, team_size, ompt_parallel_invoker_program, - codeptr); + &ompt_parallel_data, team_size, + ompt_parallel_invoker_program | ompt_parallel_team, codeptr); } } #endif // OMPT_SUPPORT @@ -1533,9 +1533,13 @@ int team_size = master_set_numthreads ? master_set_numthreads : get__nproc_2(parent_team, master_tid); + int flags = OMPT_INVOKER(call_context) | + ((microtask == (microtask_t)__kmp_teams_master) + ? ompt_parallel_league + : ompt_parallel_team); ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( - parent_task_data, ompt_frame, &ompt_parallel_data, team_size, - OMPT_INVOKER(call_context), return_address); + parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags, + return_address); } master_th->th.ompt_thread_info.state = ompt_state_overhead; } @@ -1565,9 +1569,7 @@ // AC: we are in serialized parallel __kmpc_serialized_parallel(loc, gtid); KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); - // AC: need this in order enquiry functions work - // correctly, will restore at join time - parent_team->t.t_serialized--; + #if OMPT_SUPPORT void *dummy; void **exit_runtime_p; @@ -1585,11 +1587,12 @@ /* OMPT implicit task begin */ implicit_task_data = OMPT_CUR_TASK_DATA(master_th); if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + implicit_task_data, 1, + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); } /* OMPT state */ @@ -1598,6 +1601,9 @@ exit_runtime_p = &dummy; } #endif + // AC: need to decrement t_serialized for enquiry functions to work + // correctly, will restore at join time + parent_team->t.t_serialized--; { KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); @@ -1617,14 +1623,15 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, implicit_task_data, 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); } + ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); __ompt_lw_taskteam_unlink(master_th); - if (ompt_enabled.ompt_callback_parallel_end) { ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th), - OMPT_INVOKER(call_context), return_address); + &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th), + OMPT_INVOKER(call_context) | ompt_parallel_team, + return_address); } master_th->th.ompt_thread_info.state = ompt_state_overhead; } @@ -1639,6 +1646,15 @@ parent_team->t.t_level++; #if OMP_50_ENABLED parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save + +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_lw_taskteam_t lw_taskteam; + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, + &ompt_parallel_data, return_address); + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true); + } +#endif #endif /* Change number of threads in the team if requested */ @@ -1797,11 +1813,13 @@ task_info = OMPT_CUR_TASK_INFO(master_th); exit_runtime_p = &(task_info->frame.exit_frame.ptr); if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + &(task_info->task_data), 1, + OMPT_CUR_TASK_INFO(master_th)->thread_num, + ompt_task_implicit); } /* OMPT state */ @@ -1829,14 +1847,16 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? + OMPT_CUR_TASK_INFO(master_th)->thread_num, + ompt_task_implicit); } - + ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); __ompt_lw_taskteam_unlink(master_th); if (ompt_enabled.ompt_callback_parallel_end) { ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - OMPT_CUR_TEAM_DATA(master_th), parent_task_data, - OMPT_INVOKER(call_context), return_address); + &ompt_parallel_data, parent_task_data, + OMPT_INVOKER(call_context) | ompt_parallel_team, + return_address); } master_th->th.ompt_thread_info.state = ompt_state_overhead; } @@ -1868,6 +1888,23 @@ team->t.t_level--; // AC: call special invoker for outer "parallel" of teams construct invoker(gtid); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), 0, + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial); + } + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + &ompt_parallel_data, parent_task_data, + OMPT_INVOKER(call_context) | ompt_parallel_league, + return_address); + } + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif } else { #endif /* OMP_40_ENABLED */ argv = args; @@ -1900,7 +1937,8 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? + implicit_task_data, 1, __kmp_tid_from_gtid(gtid), + ompt_task_implicit); OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); } @@ -1929,7 +1967,8 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? + OMPT_CUR_TASK_INFO(master_th)->thread_num, + ompt_task_implicit); } ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); @@ -1937,7 +1976,8 @@ if (ompt_enabled.ompt_callback_parallel_end) { ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( &ompt_parallel_data, parent_task_data, - OMPT_INVOKER(call_context), return_address); + OMPT_INVOKER(call_context) | ompt_parallel_team, + return_address); } master_th->th.ompt_thread_info.state = ompt_state_overhead; } @@ -2330,12 +2370,11 @@ static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, kmp_team_t *team, ompt_data_t *parallel_data, - fork_context_e fork_context, void *codeptr) { + int flags, void *codeptr) { ompt_task_info_t *task_info = __ompt_get_task_info_object(0); if (ompt_enabled.ompt_callback_parallel_end) { ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context), - codeptr); + parallel_data, &(task_info->task_data), flags, codeptr); } task_info->frame.enter_frame = ompt_data_none; @@ -2371,6 +2410,7 @@ master_th->th.th_ident = loc; #if OMPT_SUPPORT + void *team_microtask = (void *)team->t.t_pkfn; if (ompt_enabled.enabled) { master_th->th.ompt_thread_info.state = ompt_state_overhead; } @@ -2472,10 +2512,25 @@ if (master_th->th.th_teams_microtask && !exit_teams && team->t.t_pkfn != (microtask_t)__kmp_teams_master && team->t.t_level == master_th->th.th_teams_level + 1) { - // AC: We need to leave the team structure intact at the end of parallel - // inside the teams construct, so that at the next parallel same (hot) team - // works, only adjust nesting levels - +// AC: We need to leave the team structure intact at the end of parallel +// inside the teams construct, so that at the next parallel same (hot) team +// works, only adjust nesting levels +#if OMPT_SUPPORT + ompt_data_t ompt_parallel_data = ompt_data_none; + if (ompt_enabled.enabled) { + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_implicit_task) { + int ompt_team_size = team->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); + } + task_info->frame.exit_frame = ompt_data_none; + task_info->task_data = ompt_data_none; + ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); + __ompt_lw_taskteam_unlink(master_th); + } +#endif /* Decrement our nested depth level */ team->t.t_level--; team->t.t_active_level--; @@ -2514,8 +2569,8 @@ #if OMPT_SUPPORT if (ompt_enabled.enabled) { - __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, - codeptr); + __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data, + OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr); } #endif @@ -2548,12 +2603,14 @@ if (ompt_enabled.enabled) { ompt_task_info_t *task_info = __ompt_get_task_info_object(0); if (ompt_enabled.ompt_callback_implicit_task) { - int ompt_team_size = team->t.t_nproc; + int flags = (team_microtask == (void *)__kmp_teams_master) + ? ompt_task_initial + : ompt_task_implicit; + int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? + OMPT_CUR_TASK_INFO(master_th)->thread_num, flags); } - task_info->frame.exit_frame = ompt_data_none; task_info->task_data = ompt_data_none; } @@ -2629,8 +2686,12 @@ __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); #if OMPT_SUPPORT + int flags = + OMPT_INVOKER(fork_context) | + ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league + : ompt_parallel_team); if (ompt_enabled.enabled) { - __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags, codeptr); } #endif @@ -5809,18 +5870,15 @@ this_thr->th.ompt_thread_info.state = ompt_state_overhead; this_thr->th.ompt_thread_info.wait_id = 0; this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); + this_thr->th.ompt_thread_info.parallel_flags = 0; if (ompt_enabled.ompt_callback_thread_begin) { ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( ompt_thread_worker, thread_data); } - } -#endif - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { this_thr->th.ompt_thread_info.state = ompt_state_idle; } #endif + /* This is the place where threads wait for work */ while (!TCR_4(__kmp_global.g.g_done)) { KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); @@ -7176,7 +7234,7 @@ ompt_team_size = team->t.t_nproc; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, - __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? + __kmp_tid_from_gtid(gtid), ompt_task_implicit); OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); } #endif @@ -7200,6 +7258,7 @@ ); #if OMPT_SUPPORT *exit_runtime_p = NULL; + this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team; #endif #if KMP_STATS_ENABLED @@ -7279,7 +7338,22 @@ (void *)__kmp_teams_master); #endif __kmp_run_before_invoked_task(gtid, 0, this_thr, team); +#if OMPT_SUPPORT + int tid = __kmp_tid_from_gtid(gtid); + ompt_data_t *task_data = + &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data; + ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid, + ompt_task_initial); + OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid; + } +#endif __kmp_teams_master(gtid); +#if OMPT_SUPPORT + this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league; +#endif __kmp_run_after_invoked_task(gtid, 0, this_thr, team); return 1; } Index: runtime/src/kmp_wait_release.h =================================================================== --- runtime/src/kmp_wait_release.h +++ runtime/src/kmp_wait_release.h @@ -140,8 +140,11 @@ #endif if (!KMP_MASTER_TID(ds_tid)) { if (ompt_enabled.ompt_callback_implicit_task) { + int flags = this_thr->th.ompt_thread_info.parallel_flags; + flags = (flags & ompt_parallel_league) ? ompt_task_initial + : ompt_task_implicit; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit); + ompt_scope_end, NULL, tId, 0, ds_tid, flags); } // return to idle state this_thr->th.ompt_thread_info.state = ompt_state_idle; Index: runtime/src/ompt-internal.h =================================================================== --- runtime/src/ompt-internal.h +++ runtime/src/ompt-internal.h @@ -83,6 +83,7 @@ ompt_state_t state; ompt_wait_id_t wait_id; int ompt_task_yielded; + int parallel_flags; // information for the last parallel region invoked void *idle_frame; } ompt_thread_info_t; Index: runtime/src/ompt-specific.h =================================================================== --- runtime/src/ompt-specific.h +++ runtime/src/ompt-specific.h @@ -26,7 +26,7 @@ int gtid, ompt_data_t *ompt_pid, void *codeptr); void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, - int on_heap); + int on_heap, bool always = false); void __ompt_lw_taskteam_unlink(kmp_info_t *thr); Index: runtime/src/ompt-specific.cpp =================================================================== --- runtime/src/ompt-specific.cpp +++ runtime/src/ompt-specific.cpp @@ -269,10 +269,11 @@ } void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, - int on_heap) { + int on_heap, bool always) { ompt_lw_taskteam_t *link_lwt = lwt; - if (thr->th.th_team->t.t_serialized > - 1) { // we already have a team, so link the new team and swap values + if (always || + thr->th.th_team->t.t_serialized > + 1) { // we already have a team, so link the new team and swap values if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap link_lwt = (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); Index: runtime/test/ompt/callback.h =================================================================== --- runtime/test/ompt/callback.h +++ runtime/test/ompt/callback.h @@ -472,7 +472,8 @@ char buffer[2048]; format_task_type(flags, buffer); - if(parallel_data->ptr) + // Only check initial task not created by teams construct + if (team_size == 1 && thread_num == 1 && parallel_data->ptr) printf("%s\n", "0: parallel_data initially not null"); parallel_data->value = ompt_get_unique_id(); printf("%" PRIu64 ": ompt_event_initial_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32 ", index=%" PRIu32 ", flags=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num, flags); @@ -483,7 +484,12 @@ break; case ompt_scope_end: if(flags & ompt_task_initial){ - printf("%" PRIu64 ": ompt_event_initial_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num); + printf("%" PRIu64 ": ompt_event_initial_task_end: parallel_id=%" PRIu64 + ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32 + ", index=%" PRIu32 "\n", + ompt_get_thread_data()->value, + (parallel_data) ? parallel_data->value : 0, task_data->value, + team_size, thread_num); } else { printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num); } @@ -624,23 +630,28 @@ if(parallel_data->ptr) printf("0: parallel_data initially not null\n"); parallel_data->value = ompt_get_unique_id(); - printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 + int invoker = flag & 0xF; + const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams"; + const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams"; + printf("%" PRIu64 ": ompt_event_%s_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, " - "parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 + "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", - ompt_get_thread_data()->value, encountering_task_data->value, + ompt_get_thread_data()->value, event, encountering_task_data->value, encountering_task_frame->exit_frame.ptr, - encountering_task_frame->enter_frame.ptr, parallel_data->value, - requested_team_size, codeptr_ra, flag); + encountering_task_frame->enter_frame.ptr, parallel_data->value, size, + requested_team_size, codeptr_ra, invoker); } static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data, ompt_data_t *encountering_task_data, int flag, const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 + int invoker = flag & 0xF; + const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams"; + printf("%" PRIu64 ": ompt_event_%s_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", - ompt_get_thread_data()->value, parallel_data->value, - encountering_task_data->value, flag, codeptr_ra); + ompt_get_thread_data()->value, event, parallel_data->value, + encountering_task_data->value, invoker, codeptr_ra); } static void Index: runtime/test/ompt/misc/interoperability.cpp =================================================================== --- runtime/test/ompt/misc/interoperability.cpp +++ runtime/test/ompt/misc/interoperability.cpp @@ -58,7 +58,8 @@ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id={{[0-9]+}} -// CHECK-SAME: task_id=[[PARENT_TASK_ID_1:[0-9]+]], actual_parallelism=1, index=1, flags=1 +// CHECK-SAME: task_id=[[PARENT_TASK_ID_1:[0-9]+]], actual_parallelism=1, +// CHECK-SAME: index=1, flags=1 // CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_parallel_begin: // CHECK-SAME: parent_task_id=[[PARENT_TASK_ID_1]] @@ -73,7 +74,7 @@ // CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_initial_task_end: // CHECK-SAME: parallel_id={{[0-9]+}}, task_id=[[PARENT_TASK_ID_1]], -// CHECK-SAME: team_size=0, thread_num=1 +// CHECK-SAME: actual_parallelism=0, index=1 // CHECK: {{^}}[[MASTER_ID_1]]: ompt_event_thread_end: // CHECK-SAME: thread_id=[[MASTER_ID_1]] @@ -83,7 +84,8 @@ // CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID_2]] // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_initial_task_begin: parallel_id={{[0-9]+}} -// CHECK-SAME: task_id=[[PARENT_TASK_ID_2:[0-9]+]], actual_parallelism=1, index=1, flags=1 +// CHECK-SAME: task_id=[[PARENT_TASK_ID_2:[0-9]+]], actual_parallelism=1, +// CHECK-SAME: index=1, flags=1 // CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_parallel_begin: // CHECK-SAME: parent_task_id=[[PARENT_TASK_ID_2]] @@ -99,7 +101,7 @@ // CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_initial_task_end: // CHECK-SAME: parallel_id={{[0-9]+}}, task_id=[[PARENT_TASK_ID_2]], -// CHECK-SAME: team_size=0, thread_num=1 +// CHECK-SAME: actual_parallelism=0, index=1 // CHECK: {{^}}[[MASTER_ID_2]]: ompt_event_thread_end: // CHECK-SAME: thread_id=[[MASTER_ID_2]] Index: runtime/test/ompt/teams/parallel_team.c =================================================================== --- /dev/null +++ runtime/test/ompt/teams/parallel_team.c @@ -0,0 +1,81 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" + +int main() { +#pragma omp target teams num_teams(1) thread_limit(2) +#pragma omp parallel num_threads(2) + { printf("In teams\n"); } + return 0; +} + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] + +// CHECK-NOT: 0: parallel_data initially not null +// CHECK-NOT: 0: task_data initially not null +// CHECK-NOT: 0: thread_data initially not null + +// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_initial_task_begin: +// CHECK-SAME: task_id=[[INIT_TASK:[0-9]+]], {{.*}}, index=1 + +// CHECK: {{^}}[[MASTER]]: ompt_event_teams_begin: +// CHECK-SAME: parent_task_id=[[INIT_TASK]] +// CHECK-SAME: {{.*}} requested_num_teams=1 +// CHECK-SAME: {{.*}} invoker=[[TEAMS_FLAGS:[0-9]+]] + +// +// team 0/thread 0 +// +// initial task in the teams construct +// CHECK: {{^}}[[MASTER]]: ompt_event_initial_task_begin: +// CHECK-SAME: task_id=[[INIT_TASK_0:[0-9]+]], actual_parallelism=1, index=0 + +// parallel region forked by runtime +// CHECK: {{^}}[[MASTER]]: ompt_event_parallel_begin: +// CHECK-SAME: {{.*}} parent_task_id=[[INIT_TASK_0]] +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0:[0-9]+]] +// CHECK: {{^}}[[MASTER]]: ompt_event_implicit_task_begin: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0]], task_id=[[IMPL_TASK_0:[0-9]+]] + +// user parallel region +// CHECK: {{^}}[[MASTER]]: ompt_event_parallel_begin: +// CHECK-SAME: {{.*}} parent_task_id=[[IMPL_TASK_0]] +// CHECK-SAME: {{.*}} parallel_id=[[PAR_00:[0-9]+]] +// CHECK-SAME: {{.*}} requested_team_size=2 +// CHECK: {{^}}[[MASTER]]: ompt_event_implicit_task_begin: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_00]], task_id=[[IMPL_TASK_00:[0-9]+]] +// CHECK-SAME: {{.*}} team_size=2, thread_num=0 +// +// barrier event is here +// +// CHECK: {{^}}[[MASTER]]: ompt_event_implicit_task_end: +// CHECK-SAME: {{.*}} parallel_id={{[0-9]+}}, task_id=[[IMPL_TASK_00]] +// CHECK: {{^}}[[MASTER]]: ompt_event_parallel_end: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_00]], task_id=[[IMPL_TASK_0]] + +// CHECK: {{^}}[[MASTER]]: ompt_event_implicit_task_end: +// CHECK-SAME: {{.*}} parallel_id={{[0-9]+}}, task_id=[[IMPL_TASK_0]] +// CHECK: {{^}}[[MASTER]]: ompt_event_parallel_end: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0]], task_id=[[INIT_TASK_0]] + +// CHECK: {{^}}[[MASTER]]: ompt_event_initial_task_end: +// CHECK-SAME: task_id=[[INIT_TASK_0]], actual_parallelism=0, index=0 + +// CHECK: {{^}}[[MASTER]]: ompt_event_teams_end: +// CHECK-SAME: {{.*}} task_id=[[INIT_TASK]], invoker=[[TEAMS_FLAGS]] + +// CHECK: {{^}}[[MASTER]]: ompt_event_initial_task_end: +// CHECK-SAME: task_id=[[INIT_TASK]], {{.*}}, index=1 + +// +// team 0/thread 1 +// +// CHECK: {{^}}[[WORKER:[0-9]+]]: ompt_event_implicit_task_begin: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_00]], task_id=[[IMPL_TASK_01:[0-9]+]] +// CHECK-SAME: {{.*}} team_size=2, thread_num=1 +// +// barrier event is here +// +// CHECK: {{^}}[[WORKER]]: ompt_event_implicit_task_end: +// CHECK-SAME: {{.*}} parallel_id={{[0-9]+}}, task_id=[[IMPL_TASK_01]] Index: runtime/test/ompt/teams/serial_teams.c =================================================================== --- /dev/null +++ runtime/test/ompt/teams/serial_teams.c @@ -0,0 +1,89 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" + +int main() { +#pragma omp target teams num_teams(2) thread_limit(1) +#pragma omp parallel num_threads(1) + { printf("In teams parallel\n"); } + return 0; +} + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] + +// CHECK-NOT: 0: parallel_data initially not null +// CHECK-NOT: 0: task_data initially not null +// CHECK-NOT: 0: thread_data initially not null + +// CHECK: {{^}}[[MASTER_0:[0-9]+]]: ompt_event_initial_task_begin: +// CHECK-SAME: task_id=[[INIT_TASK:[0-9]+]], {{.*}}, index=1 + +// CHECK: {{^}}[[MASTER_0]]: ompt_event_teams_begin: +// CHECK-SAME: parent_task_id=[[INIT_TASK]] +// CHECK-SAME: {{.*}} requested_num_teams=2 +// CHECK-SAME: {{.*}} invoker=[[TEAMS_FLAGS:[0-9]+]] + +// +// team 0 +// +// initial task in the teams construct +// CHECK: {{^}}[[MASTER_0]]: ompt_event_initial_task_begin: +// CHECK-SAME: task_id=[[INIT_TASK_0:[0-9]+]], actual_parallelism=2, index=0 + +// parallel region forked by runtime +// CHECK: {{^}}[[MASTER_0]]: ompt_event_parallel_begin: +// CHECK-SAME: {{.*}} parent_task_id=[[INIT_TASK_0]] +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0:[0-9]+]] +// CHECK: {{^}}[[MASTER_0]]: ompt_event_implicit_task_begin: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0]], task_id=[[IMPL_TASK_0:[0-9]+]] + +// user parallel region +// CHECK: {{^}}[[MASTER_0]]: ompt_event_parallel_begin: +// CHECK-SAME: {{.*}} parent_task_id=[[IMPL_TASK_0]] +// CHECK-SAME: {{.*}} parallel_id=[[PAR_00:[0-9]+]] +// CHECK: {{^}}[[MASTER_0]]: ompt_event_parallel_end: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_00]], task_id=[[IMPL_TASK_0]] + +// CHECK: {{^}}[[MASTER_0]]: ompt_event_implicit_task_end: +// CHECK-SAME: {{.*}} parallel_id={{[0-9]+}}, task_id=[[IMPL_TASK_0]] +// CHECK: {{^}}[[MASTER_0]]: ompt_event_parallel_end: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0]], task_id=[[INIT_TASK_0]] + +// CHECK: {{^}}[[MASTER_0]]: ompt_event_initial_task_end: +// CHECK-SAME: task_id=[[INIT_TASK_0]], actual_parallelism=0, index=0 + +// CHECK: {{^}}[[MASTER_0]]: ompt_event_teams_end: +// CHECK-SAME: {{.*}} task_id=[[INIT_TASK]], invoker=[[TEAMS_FLAGS]] + +// CHECK: {{^}}[[MASTER_0]]: ompt_event_initial_task_end: +// CHECK-SAME: task_id=[[INIT_TASK]], {{.*}}, index=1 + +// +// team 1 +// +// initial task in the teams construct +// CHECK: {{^}}[[MASTER_1:[0-9]+]]: ompt_event_initial_task_begin: +// CHECK-SAME: task_id=[[INIT_TASK_1:[0-9]+]], actual_parallelism=2, index=1 + +// parallel region forked by runtime +// CHECK: {{^}}[[MASTER_1]]: ompt_event_parallel_begin: +// CHECK-SAME: {{.*}} parent_task_id=[[INIT_TASK_1]] +// CHECK-SAME: {{.*}} parallel_id=[[PAR_ID_1:[0-9]+]] +// CHECK: {{^}}[[MASTER_1]]: ompt_event_implicit_task_begin: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_ID_1]], task_id=[[IMPL_TASK_1:[0-9]+]] + +// user parallel region +// CHECK: {{^}}[[MASTER_1]]: ompt_event_parallel_begin: +// CHECK-SAME: {{.*}} parent_task_id=[[IMPL_TASK_1]] +// CHECK-SAME: {{.*}} parallel_id=[[PAR_ID_11:[0-9]+]] +// CHECK: {{^}}[[MASTER_1]]: ompt_event_parallel_end: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_ID_11]], task_id=[[IMPL_TASK_1]] + +// CHECK: {{^}}[[MASTER_1]]: ompt_event_implicit_task_end: +// CHECK-SAME: {{.*}} parallel_id={{[0-9]+}}, task_id=[[IMPL_TASK_1]] +// CHECK: {{^}}[[MASTER_1]]: ompt_event_parallel_end: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_ID_1]], task_id=[[INIT_TASK_1]] + +// CHECK: {{^}}[[MASTER_1]]: ompt_event_initial_task_end: +// CHECK-SAME: task_id=[[INIT_TASK_1]], actual_parallelism=0, index=1 Index: runtime/test/ompt/teams/serialized.c =================================================================== --- /dev/null +++ runtime/test/ompt/teams/serialized.c @@ -0,0 +1,62 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" + +int main() { +#pragma omp target teams num_teams(1) thread_limit(1) +#pragma omp parallel num_threads(1) + { printf("In teams\n"); } + return 0; +} + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] + +// CHECK-NOT: 0: parallel_data initially not null +// CHECK-NOT: 0: task_data initially not null +// CHECK-NOT: 0: thread_data initially not null + +// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_initial_task_begin: +// CHECK-SAME: task_id=[[INIT_TASK:[0-9]+]], {{.*}}, index=1 + +// CHECK: {{^}}[[MASTER]]: ompt_event_teams_begin: +// CHECK-SAME: parent_task_id=[[INIT_TASK]] +// CHECK-SAME: {{.*}} requested_num_teams=1 +// CHECK-SAME: {{.*}} invoker=[[TEAMS_FLAGS:[0-9]+]] + +// initial task in the teams construct starts +// CHECK: {{^}}[[MASTER]]: ompt_event_initial_task_begin: +// CHECK-SAME: task_id=[[INIT_TASK_0:[0-9]+]], actual_parallelism=1, index=0 + +// parallel region forked by runtime +// CHECK: {{^}}[[MASTER]]: ompt_event_parallel_begin: +// CHECK-SAME: {{.*}} parent_task_id=[[INIT_TASK_0]] +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0:[0-9]+]] +// CHECK: {{^}}[[MASTER]]: ompt_event_implicit_task_begin: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0]], task_id=[[IMPL_TASK_0:[0-9]+]] + +// user parallel region +// CHECK: {{^}}[[MASTER]]: ompt_event_parallel_begin: +// CHECK-SAME: {{.*}} parent_task_id=[[IMPL_TASK_0]] +// CHECK-SAME: {{.*}} parallel_id=[[PAR_00:[0-9]+]] +// CHECK-SAME: {{.*}} requested_team_size=1 +// CHECK: {{^}}[[MASTER]]: ompt_event_implicit_task_begin: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_00]], task_id=[[IMPL_TASK_00:[0-9]+]] +// CHECK-SAME: {{.*}} team_size=1, thread_num=0 +// CHECK: {{^}}[[MASTER]]: ompt_event_implicit_task_end: +// CHECK-SAME: {{.*}} parallel_id={{[0-9]+}}, task_id=[[IMPL_TASK_00]] +// CHECK: {{^}}[[MASTER]]: ompt_event_parallel_end: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_00]], task_id=[[IMPL_TASK_0]] + +// CHECK: {{^}}[[MASTER]]: ompt_event_parallel_end: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0]], task_id=[[INIT_TASK_0]] + +// initial task in the teams construct ends +// CHECK: {{^}}[[MASTER]]: ompt_event_initial_task_end: +// CHECK-SAME: task_id=[[INIT_TASK_0]], actual_parallelism=0, index=0 + +// CHECK: {{^}}[[MASTER]]: ompt_event_teams_end: +// CHECK-SAME: {{.*}} task_id=[[INIT_TASK]], invoker=[[TEAMS_FLAGS]] + +// CHECK: {{^}}[[MASTER]]: ompt_event_initial_task_end: +// CHECK-SAME: task_id=[[INIT_TASK]], {{.*}}, index=1 Index: runtime/test/ompt/teams/team.c =================================================================== --- /dev/null +++ runtime/test/ompt/teams/team.c @@ -0,0 +1,49 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" + +int main() { +#pragma omp target teams num_teams(1) thread_limit(1) + { printf("In teams\n"); } + return 0; +} + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] + +// CHECK-NOT: 0: parallel_data initially not null +// CHECK-NOT: 0: task_data initially not null +// CHECK-NOT: 0: thread_data initially not null + +// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_initial_task_begin: +// CHECK-SAME: task_id=[[INIT_TASK:[0-9]+]], {{.*}}, index=1 + +// CHECK: {{^}}[[MASTER]]: ompt_event_teams_begin: +// CHECK-SAME: parent_task_id=[[INIT_TASK]] +// CHECK-SAME: {{.*}} requested_num_teams=1 +// CHECK-SAME: {{.*}} invoker=[[TEAMS_FLAGS:[0-9]+]] + +// initial task in the teams construct starts +// CHECK: {{^}}[[MASTER]]: ompt_event_initial_task_begin: +// CHECK-SAME: task_id=[[INIT_TASK_0:[0-9]+]], actual_parallelism=1, index=0 + +// parallel region forked by runtime +// CHECK: {{^}}[[MASTER]]: ompt_event_parallel_begin: +// CHECK-SAME: {{.*}} parent_task_id=[[INIT_TASK_0]] +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0:[0-9]+]] +// CHECK: {{^}}[[MASTER]]: ompt_event_implicit_task_begin: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0]], task_id=[[IMPL_TASK_0:[0-9]+]] +// CHECK: {{^}}[[MASTER]]: ompt_event_implicit_task_end: +// CHECK-SAME: {{.*}} parallel_id={{[0-9]+}}, task_id=[[IMPL_TASK_0]] +// CHECK: {{^}}[[MASTER]]: ompt_event_parallel_end: +// CHECK-SAME: {{.*}} parallel_id=[[PAR_0]], task_id=[[INIT_TASK_0]] + +// initial task in the teams construct ends +// CHECK: {{^}}[[MASTER]]: ompt_event_initial_task_end: +// CHECK-SAME: task_id=[[INIT_TASK_0]], actual_parallelism=0, index=0 + +// CHECK: {{^}}[[MASTER]]: ompt_event_teams_end: +// CHECK-SAME: {{.*}} task_id=[[INIT_TASK]], invoker=[[TEAMS_FLAGS]] + +// CHECK: {{^}}[[MASTER]]: ompt_event_initial_task_end: +// CHECK-SAME: task_id=[[INIT_TASK]], {{.*}}, index=1