diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -284,20 +284,7 @@ va_start(ap, microtask); #if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - kmp_info_t *master_th = __kmp_threads[gtid]; - kmp_team_t *parent_team = master_th->th.th_team; - ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; - if (lwt) - ompt_frame = &(lwt->ompt_task_info.frame); - else { - int tid = __kmp_tid_from_gtid(gtid); - ompt_frame = &( - parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); - } - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif @@ -384,12 +371,7 @@ this_thr->th.th_team->t.t_level; // AC: can be >0 on host #if OMPT_SUPPORT - kmp_team_t *parent_team = this_thr->th.th_team; - int tid = __kmp_tid_from_gtid(gtid); - if (ompt_enabled.enabled) { - parent_team->t.t_implicit_task_taskdata[tid] - .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif @@ -468,9 +450,13 @@ // each case. __kmp_assert_valid_gtid(global_tid); #if OMPT_SUPPORT + OMPT_BEGIN_CUR_RUNTIME_ENTER_ADDRESS(global_tid); OMPT_STORE_RETURN_ADDRESS(global_tid); #endif __kmp_serialized_parallel(loc, global_tid); +#if OMPT_SUPPORT + OMPT_BEGIN_CUR_RUNTIME_EXIT_ADDRESS(global_tid); +#endif } /*! @@ -627,9 +613,13 @@ } void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { #if OMPT_SUPPORT + OMPT_END_RUNTIME_EXIT_ADDRESS(global_tid); OMPT_STORE_RETURN_ADDRESS(global_tid); #endif __kmp_end_serialized_parallel_impl(loc, global_tid); +#if OMPT_SUPPORT + OMPT_END_RUNTIME_ENTER_ADDRESS(global_tid); +#endif } /*! @@ -734,20 +724,10 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { __kmp_assert_valid_gtid(global_tid); #if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(global_tid); OMPT_STORE_RETURN_ADDRESS(global_tid); #endif __kmp_barrier_impl(loc, global_tid); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif } /* The BARRIER for a MASTER section is always explicit */ @@ -1668,23 +1648,13 @@ __kmp_check_barrier(global_tid, ct_barrier, loc); #if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(global_tid); OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif return (status != 0) ? 0 : 1; } @@ -1732,23 +1702,13 @@ } #if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(global_tid); OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif ret = __kmpc_master(loc, global_tid); @@ -2130,12 +2090,7 @@ *data_ptr = cpy_data; #if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif /* This barrier is not a barrier region boundary */ @@ -3515,12 +3470,7 @@ // JP: as long as there is a barrier in the implementation, OMPT should and // will provide the barrier events // so we set-up the necessary frame/return addresses. - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3529,11 +3479,6 @@ __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), global_tid, FALSE, reduce_size, reduce_data, reduce_func); retval = (retval != 0) ? (0) : (1); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif // all other workers except master should do this pop here // ( none of other workers will get to __kmpc_end_reduce_nowait() ) @@ -3709,12 +3654,7 @@ // this barrier should be visible to a customer and to the threading profile // tool (it's a terminating barrier on constructs if NOWAIT not specified) #if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = @@ -3724,11 +3664,6 @@ __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), global_tid, TRUE, reduce_size, reduce_data, reduce_func); retval = (retval != 0) ? (0) : (1); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif // all other workers except master should do this pop here // ( none of other workers except master will enter __kmpc_end_reduce() ) @@ -3793,22 +3728,12 @@ // TODO: implicit barrier: should be exposed #if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif } else if (packed_reduction_method == empty_reduce_block) { @@ -3818,47 +3743,30 @@ // TODO: implicit barrier: should be exposed #if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif } else if (packed_reduction_method == atomic_reduce_block) { #if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(global_tid); #endif // TODO: implicit barrier: should be exposed #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif } else if (TEST_REDUCTION_METHOD(packed_reduction_method, tree_reduce_block)) { +#if OMPT_SUPPORT + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(global_tid); +#endif // only master executes here (master releases all other workers) __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), global_tid); diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -98,19 +98,10 @@ MKLOC(loc, "GOMP_barrier"); KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); #if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_aux_barrier(&loc, gtid); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif } // Mutual exclusion @@ -246,11 +237,7 @@ // and for all other threads to reach this point. #if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); @@ -259,11 +246,6 @@ // threads to do likewise, then return. retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif return retval; } @@ -277,20 +259,11 @@ // propagated to all threads before trying to reuse the t_copypriv_data field. __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; #if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_START)(void) { @@ -342,7 +315,6 @@ void *data) { #if OMPT_SUPPORT kmp_info_t *thr; - ompt_frame_t *ompt_frame; ompt_state_t enclosing_state; if (ompt_enabled.enabled) { @@ -353,18 +325,16 @@ enclosing_state = thr->th.ompt_thread_info.state; thr->th.ompt_thread_info.state = ompt_state_work_parallel; - // set task frame - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } +#if OMPT_SUPPORT + OMPT_STORE_CUR_RUNTIME_EXIT_ADDRESS(*gtid); +#endif #endif task(data); #if OMPT_SUPPORT if (ompt_enabled.enabled) { - // clear task frame - ompt_frame->exit_frame = ompt_data_none; // restore enclosing state thr->th.ompt_thread_info.state = enclosing_state; @@ -392,7 +362,6 @@ #if OMPT_SUPPORT kmp_info_t *thr; - ompt_frame_t *ompt_frame; ompt_state_t enclosing_state; if (ompt_enabled.enabled) { @@ -401,10 +370,8 @@ enclosing_state = thr->th.ompt_thread_info.state; thr->th.ompt_thread_info.state = ompt_state_work_parallel; - // set task frame - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } + OMPT_STORE_CUR_RUNTIME_EXIT_ADDRESS(*gtid); #endif // Now invoke the microtask. @@ -412,9 +379,6 @@ #if OMPT_SUPPORT if (ompt_enabled.enabled) { - // clear task frame - ompt_frame->exit_frame = ompt_data_none; - // reset enclosing state thr->th.ompt_thread_info.state = enclosing_state; } @@ -470,12 +434,7 @@ int gtid = __kmp_entry_gtid(); #if OMPT_SUPPORT - ompt_frame_t *parent_frame, *frame; - - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); - parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_BEGIN_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif @@ -485,10 +444,7 @@ (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data); #if OMPT_SUPPORT - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL); - frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_BEGIN_CUR_RUNTIME_EXIT_ADDRESS(gtid); #endif } @@ -506,20 +462,17 @@ thr->th.th_team); } #if OMPT_SUPPORT - if (ompt_enabled.enabled) { - // Implicit task is finished here, in the barrier we might schedule - // deferred tasks, - // these don't see the implicit task on the stack - OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none; - } + OMPT_END_RUNTIME_EXIT_ADDRESS(gtid); #endif - __kmp_join_call(&loc, gtid #if OMPT_SUPPORT , fork_context_gnu #endif ); +#if OMPT_SUPPORT + OMPT_END_RUNTIME_ENTER_ADDRESS(gtid); +#endif } // Loop worksharing constructs @@ -810,19 +763,10 @@ KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) #if OMPT_SUPPORT && OMPT_OPTIONAL - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) } @@ -1132,17 +1076,9 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL -#define OMPT_LOOP_PRE() \ - ompt_frame_t *parent_frame; \ - if (ompt_enabled.enabled) { \ - __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \ - parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); \ - } +#define OMPT_LOOP_PRE() OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); -#define OMPT_LOOP_POST() \ - if (ompt_enabled.enabled) { \ - parent_frame->enter_frame = ompt_data_none; \ - } +#define OMPT_LOOP_POST() #else @@ -1213,11 +1149,7 @@ } #if OMPT_SUPPORT - kmp_taskdata_t *current_task; - if (ompt_enabled.enabled) { - current_task = __kmp_threads[gtid]->th.th_current_task; - current_task->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); #endif if (if_cond) { @@ -1247,8 +1179,6 @@ oldInfo = thread->th.ompt_thread_info; thread->th.ompt_thread_info.wait_id = 0; thread->th.ompt_thread_info.state = ompt_state_work_parallel; - // taskdata->ompt_task_info.frame.exit_frame.ptr = - // OMPT_GET_FRAME_ADDRESS(0); } #endif if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) { @@ -1266,7 +1196,7 @@ #if OMPT_SUPPORT if (UNLIKELY(ompt_enabled.enabled)) { - __kmp_omp_task_begin_if0_ompt(&loc, gtid, task, OMPT_GET_FRAME_ADDRESS(1), + __kmp_omp_task_begin_if0_ompt(&loc, gtid, task, NULL, OMPT_GET_FRAME_ADDRESS(0), OMPT_GET_RETURN_ADDRESS(0)); } else @@ -1285,15 +1215,9 @@ #if OMPT_SUPPORT if (ompt_enabled.enabled) { thread->th.ompt_thread_info = oldInfo; - taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; } #endif } -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - current_task->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); } @@ -1377,12 +1301,7 @@ int gtid = __kmp_entry_gtid(); #if OMPT_SUPPORT - ompt_frame_t *parent_frame; - - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); - parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif @@ -1394,12 +1313,6 @@ task, data, num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - parent_frame->enter_frame = ompt_data_none; - } -#endif - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid)); @@ -1410,19 +1323,10 @@ KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) #if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) } @@ -1446,30 +1350,17 @@ KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid)); #if OMPT_SUPPORT - ompt_task_info_t *parent_task_info, *task_info; - if (ompt_enabled.enabled) { - parent_task_info = __ompt_get_task_info_object(0); - parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data); #if OMPT_SUPPORT - if (ompt_enabled.enabled) { - task_info = __ompt_get_task_info_object(0); - task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } + OMPT_STORE_CUR_RUNTIME_EXIT_ADDRESS(gtid); #endif task(data); KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - task_info->frame.exit_frame = ompt_data_none; - parent_task_info->frame.enter_frame = ompt_data_none; - } -#endif } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *), @@ -1832,6 +1723,7 @@ int priority, long start, long end, long step) { #if OMPT_SUPPORT int gtid = __kmp_entry_gtid(); + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif __GOMP_taskloop(func, data, copy_func, arg_size, arg_align, gomp_flags, @@ -1843,6 +1735,11 @@ long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks, int priority, unsigned long long start, unsigned long long end, unsigned long long step) { +#if OMPT_SUPPORT + int gtid = __kmp_entry_gtid(); + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __GOMP_taskloop(func, data, copy_func, arg_size, arg_align, gomp_flags, num_tasks, priority, start, end, step); @@ -1903,6 +1800,7 @@ for (kmp_int32 i = 0; i < ndeps; i++) dep_list[i] = gomp_depends.get_kmp_depend(i); #if OMPT_SUPPORT + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_aux_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -1193,7 +1193,6 @@ ompt_task_info_t *parent_task_info; parent_task_info = OMPT_CUR_TASK_INFO(this_thr); - parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); if (ompt_enabled.ompt_callback_parallel_begin) { int team_size = 1; @@ -1366,8 +1365,6 @@ #if OMPT_SUPPORT if (ompt_enabled.enabled && this_thr->th.ompt_thread_info.state != ompt_state_overhead) { - OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - ompt_lw_taskteam_t lw_taskteam; __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, &ompt_parallel_data, codeptr); @@ -1387,7 +1384,6 @@ /* OMPT state */ this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; - OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #endif } @@ -1526,7 +1522,7 @@ } #if OMPT_SUPPORT - void *dummy; + OmptExitAddressGuard ExitAddressGuard; void **exit_frame_p; ompt_lw_taskteam_t lw_taskteam; @@ -1534,7 +1530,6 @@ if (ompt_enabled.enabled) { __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data, return_address); - exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); // don't use lw_taskteam after linking. content was swaped @@ -1552,9 +1547,9 @@ /* OMPT state */ master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_frame_p = &dummy; } + exit_frame_p = + ExitAddressGuard.getFramePointer(master_th, ompt_frame_runtime); #endif // AC: need to decrement t_serialized for enquiry functions to work // correctly, will restore at join time @@ -1573,8 +1568,6 @@ #if OMPT_SUPPORT if (ompt_enabled.enabled) { - *exit_frame_p = NULL; - OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none; if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, implicit_task_data, 1, @@ -1756,7 +1749,7 @@ // Get args from parent team for teams construct #if OMPT_SUPPORT - void *dummy; + OmptExitAddressGuard ExitAddressGuard; void **exit_frame_p; ompt_task_info_t *task_info; @@ -1770,7 +1763,6 @@ // don't use lw_taskteam after linking. content was swaped task_info = OMPT_CUR_TASK_INFO(master_th); - exit_frame_p = &(task_info->frame.exit_frame.ptr); if (ompt_enabled.ompt_callback_implicit_task) { OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); @@ -1783,9 +1775,9 @@ /* OMPT state */ master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_frame_p = &dummy; } + exit_frame_p = + ExitAddressGuard.getFramePointer(master_th, ompt_frame_runtime); #endif { @@ -1802,7 +1794,6 @@ #if OMPT_SUPPORT if (ompt_enabled.enabled) { - *exit_frame_p = NULL; if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), 1, @@ -1866,7 +1857,7 @@ KMP_MB(); #if OMPT_SUPPORT - void *dummy; + OmptExitAddressGuard ExitAddressGuard; void **exit_frame_p; ompt_task_info_t *task_info; @@ -1878,7 +1869,6 @@ __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); // don't use lw_taskteam after linking. content was swaped task_info = OMPT_CUR_TASK_INFO(master_th); - exit_frame_p = &(task_info->frame.exit_frame.ptr); /* OMPT implicit task begin */ implicit_task_data = OMPT_CUR_TASK_DATA(master_th); @@ -1893,9 +1883,9 @@ /* OMPT state */ master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_frame_p = &dummy; } + exit_frame_p = + ExitAddressGuard.getFramePointer(master_th, ompt_frame_runtime); #endif { @@ -1911,7 +1901,6 @@ #if OMPT_SUPPORT if (ompt_enabled.enabled) { - *exit_frame_p = NULL; if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), 1, @@ -1937,7 +1926,6 @@ __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data, return_address); - lwt.ompt_task_info.frame.exit_frame = ompt_data_none; __ompt_lw_taskteam_link(&lwt, master_th, 1); // don't use lw_taskteam after linking. content was swaped #endif @@ -2288,7 +2276,6 @@ parallel_data, &(task_info->task_data), flags, codeptr); } - task_info->frame.enter_frame = ompt_data_none; __kmp_join_restore_state(thread, team); } #endif @@ -2424,7 +2411,6 @@ ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); } - task_info->frame.exit_frame = ompt_data_none; task_info->task_data = ompt_data_none; ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); __ompt_lw_taskteam_unlink(master_th); @@ -2506,7 +2492,6 @@ ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, OMPT_CUR_TASK_INFO(master_th)->thread_num, flags); } - task_info->frame.exit_frame = ompt_data_none; task_info->task_data = ompt_data_none; } #endif @@ -5785,7 +5770,6 @@ #if OMPT_SUPPORT if (ompt_enabled.enabled) { /* no frame set while outside task */ - __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none; this_thr->th.ompt_thread_info.state = ompt_state_overhead; } @@ -7111,6 +7095,13 @@ __kmp_finish_implicit_task(this_thr); } +extern "C" void __kmp_GOMP_microtask_wrapper(int *gtid, int *npr, + void (*task)(void *), void *data); +extern "C" void __kmp_GOMP_parallel_microtask_wrapper( + int *gtid, int *npr, void (*task)(void *), void *data, unsigned num_threads, + ident_t *loc, enum sched_type schedule, long start, long end, long incr, + long chunk_size); + int __kmp_invoke_task_func(int gtid) { int rc; int tid = __kmp_tid_from_gtid(gtid); @@ -7130,17 +7121,20 @@ #endif #if OMPT_SUPPORT - void *dummy; - void **exit_frame_p; ompt_data_t *my_task_data; ompt_data_t *my_parallel_data; int ompt_team_size; - if (ompt_enabled.enabled) { - exit_frame_p = &( - team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr); + OmptExitAddressGuard ExitAddressGuard; + void **exit_frame_p; + microtask_t microtask = (microtask_t)TCR_SYNC_PTR(team->t.t_pkfn); + + if (microtask != (microtask_t)__kmp_GOMP_microtask_wrapper && + microtask != (microtask_t)__kmp_GOMP_parallel_microtask_wrapper) { + exit_frame_p = + ExitAddressGuard.getFramePointer(this_thr, ompt_frame_runtime); } else { - exit_frame_p = &dummy; + exit_frame_p = ExitAddressGuard.getDummyPointer(ompt_frame_runtime); } my_task_data = @@ -7173,7 +7167,6 @@ #endif ); #if OMPT_SUPPORT - *exit_frame_p = NULL; this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team; #endif diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -519,9 +519,6 @@ #if OMPT_SUPPORT if (ompt_enabled.enabled) { - if (!current_task->ompt_task_info.frame.enter_frame.ptr) - current_task->ompt_task_info.frame.enter_frame.ptr = - OMPT_GET_FRAME_ADDRESS(0); if (ompt_enabled.ompt_callback_task_create) { ompt_data_t task_data = ompt_data_none; ompt_callbacks.ompt_callback(ompt_callback_task_create)( @@ -531,8 +528,6 @@ ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1, OMPT_LOAD_RETURN_ADDRESS(gtid)); } - - new_taskdata->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #if OMPT_OPTIONAL @@ -608,11 +603,6 @@ "dependencies: " "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, new_taskdata)); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - current_task->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif return TASK_CURRENT_NOT_QUEUED; } } else { @@ -628,11 +618,6 @@ gtid, loc_ref, new_taskdata)); kmp_int32 ret = __kmp_omp_task(gtid, new_task, true); -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { - current_task->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif return ret; } kmp_int32 __kmp_aux_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, @@ -649,6 +634,7 @@ kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) { #if OMPT_SUPPORT + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif return __kmp_omp_task_with_deps_impl(loc_ref, gtid, new_task, ndeps, dep_list, @@ -667,7 +653,6 @@ taskwait_task_data, ompt_task_complete, current_task ? &(current_task->ompt_task_info.task_data) : &task_data); } - current_task->ompt_task_info.frame.enter_frame.ptr = NULL; *taskwait_task_data = ompt_data_none; } #endif /* OMPT_SUPPORT */ @@ -708,9 +693,6 @@ ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data; KMP_ASSERT(taskwait_task_data->ptr == NULL); if (ompt_enabled.enabled) { - if (!current_task->ompt_task_info.frame.enter_frame.ptr) - current_task->ompt_task_info.frame.enter_frame.ptr = - OMPT_GET_FRAME_ADDRESS(0); if (ompt_enabled.ompt_callback_task_create) { ompt_data_t task_data = ompt_data_none; ompt_callbacks.ompt_callback(ompt_callback_task_create)( @@ -829,6 +811,7 @@ kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) { #if OMPT_SUPPORT + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); OMPT_STORE_RETURN_ADDRESS(gtid); #endif return __kmp_omp_wait_deps_impl(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -560,6 +560,9 @@ kmp_taskdata_t *current_task, kmp_int32 gtid) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + // untied undeferred task + if (current_task == taskdata) + current_task = taskdata->td_parent; ompt_task_status_t status = ompt_task_switch; if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) { status = ompt_task_yield; @@ -623,10 +626,10 @@ #if OMPT_SUPPORT if (ompt) { - current_task->ompt_task_info.frame.enter_frame.ptr = enter_frame_address; - taskdata->ompt_task_info.frame.exit_frame.ptr = exit_frame_address; - current_task->ompt_task_info.frame.enter_frame_flags = - ompt_frame_application | ompt_frame_framepointer; + if (enter_frame_address) + OMPT_BEGIN_TASK_RUNTIME_ENTER_ADDRESS(current_task, enter_frame_address); + if (exit_frame_address) + OMPT_BEGIN_TASK_RUNTIME_EXIT_ADDRESS(taskdata, exit_frame_address); taskdata->ompt_task_info.frame.exit_frame_flags = ((exit_frame_address == enter_frame_address) ? ompt_frame_application : ompt_frame_runtime) | @@ -816,6 +819,14 @@ thread->th.th_task_team; // might be NULL for serial teams... kmp_int32 children = 0; +#if OMPT_SUPPORT + if (ompt) { + // if (resumed_task!=taskdata) + OMPT_END_TASK_RUNTIME_ENTER_ADDRESS(resumed_task ? resumed_task + : taskdata->td_parent); + OMPT_END_TASK_RUNTIME_EXIT_ADDRESS(taskdata); + } +#endif KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming " "task %p\n", gtid, taskdata, resumed_task)); @@ -957,6 +968,7 @@ // Restore th_current_task first as suggested by John: // johnmc: if an asynchronous inquiry peers into the runtime system // it doesn't see the freed task as the current task. + thread->th.th_current_task = resumed_task; if (!detach) __kmp_free_task_and_ancestors(gtid, taskdata, thread); @@ -984,15 +996,6 @@ KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); -#if OMPT_SUPPORT - if (ompt) { - ompt_frame_t *ompt_frame; - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame = ompt_data_none; - ompt_frame->enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; - } -#endif - return; } @@ -1467,8 +1470,10 @@ thread->th.ompt_thread_info.state = (thread->th.th_team_serialized) ? ompt_state_work_serial : ompt_state_work_parallel; - taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } + OMPT_STORE_CUR_RUNTIME_EXIT_ADDRESS_IF( + taskdata, + taskdata->td_flags.tiedness != TASK_UNTIED || taskdata != current_task); #endif // Proxy tasks are not handled by the runtime @@ -1618,6 +1623,7 @@ loc_ref, new_taskdata)); #if OMPT_SUPPORT + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); kmp_taskdata_t *parent; if (UNLIKELY(ompt_enabled.enabled)) { parent = new_taskdata->td_parent; @@ -1649,11 +1655,6 @@ gtid, loc_ref, new_taskdata)); ANNOTATE_HAPPENS_BEFORE(new_task); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) { - parent->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif return TASK_CURRENT_NOT_QUEUED; } @@ -1717,9 +1718,6 @@ void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); if (!new_taskdata->td_flags.started) { parent = new_taskdata->td_parent; - if (!parent->ompt_task_info.frame.enter_frame.ptr) { - parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } if (ompt_enabled.ompt_callback_task_create) { ompt_data_t task_data = ompt_data_none; ompt_callbacks.ompt_callback(ompt_callback_task_create)( @@ -1735,7 +1733,7 @@ __ompt_task_finish(new_task, new_taskdata->ompt_task_info.scheduling_parent, ompt_task_switch); - new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; + new_taskdata->ompt_task_info.frame.enter_frame = ompt_data_none; } } #endif @@ -1745,11 +1743,6 @@ KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata)); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { - parent->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif return res; } kmp_int32 __kmp_aux_omp_task(ident_t *loc_ref, kmp_int32 gtid, @@ -1758,8 +1751,11 @@ } kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task) { -#if OMPT_SUPPORT && OMPT_OPTIONAL +#if OMPT_SUPPORT + OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(gtid); +#if OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); +#endif #endif return __kmp_omp_task_impl(loc_ref, gtid, new_task); } @@ -1792,8 +1788,6 @@ kmp_taskdata_t *parent = NULL; if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) { parent = new_taskdata->td_parent; - if (!parent->ompt_task_info.frame.enter_frame.ptr) - parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); if (ompt_enabled.ompt_callback_task_create) { ompt_data_t task_data = ompt_data_none; ompt_callbacks.ompt_callback(ompt_callback_task_create)( @@ -1811,11 +1805,6 @@ KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata)); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { - parent->ompt_task_info.frame.enter_frame = ompt_data_none; - } -#endif return res; } @@ -1840,11 +1829,10 @@ ompt_data_t *my_parallel_data; if (ompt) { + OMPT_STORE_RUNTIME_ENTER_ADDRESS(taskdata, frame_address); my_task_data = &(taskdata->ompt_task_info.task_data); my_parallel_data = OMPT_CUR_TEAM_DATA(thread); - taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address; - if (ompt_enabled.ompt_callback_sync_region) { ompt_callbacks.ompt_callback(ompt_callback_sync_region)( ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, @@ -1911,7 +1899,6 @@ ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, my_task_data, return_address); } - taskdata->ompt_task_info.frame.enter_frame = ompt_data_none; } #endif // OMPT_SUPPORT && OMPT_OPTIONAL diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h --- a/openmp/runtime/src/ompt-specific.h +++ b/openmp/runtime/src/ompt-specific.h @@ -68,6 +68,77 @@ #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI #define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) +#define OMPT_STORE_RUNTIME_ENTER_ADDRESS_AND_FLAGS(id, addr, flags) \ + OmptEnterAddressGuard EnterAddressGuard { id, addr, flags } +#define OMPT_STORE_RUNTIME_ENTER_ADDRESS(id, addr) \ + OMPT_STORE_RUNTIME_ENTER_ADDRESS_AND_FLAGS(id, addr, ompt_frame_runtime) +#define OMPT_STORE_CUR_RUNTIME_ENTER_ADDRESS(id) \ + OMPT_STORE_RUNTIME_ENTER_ADDRESS_AND_FLAGS(id, OMPT_GET_FRAME_ADDRESS(0), \ + ompt_frame_runtime) + +#define OMPT_BEGIN_TASK_RUNTIME_ENTER_ADDRESS(task, addr) \ + do { \ + KMP_DEBUG_ASSERT(task->ompt_task_info.frame.enter_frame.ptr == NULL); \ + task->ompt_task_info.frame.enter_frame.ptr = addr; \ + task->ompt_task_info.frame.enter_frame_flags = \ + ompt_frame_runtime | ompt_frame_framepointer; \ + } while (0) +#define OMPT_BEGIN_CUR_RUNTIME_ENTER_ADDRESS(gtid) \ + if ((ompt_enabled.enabled || !ompt_enabled.initialized) && gtid >= 0 && \ + __kmp_threads[gtid]) \ + OMPT_BEGIN_TASK_RUNTIME_ENTER_ADDRESS( \ + __kmp_threads[gtid]->th.th_current_task, OMPT_GET_FRAME_ADDRESS(0)) + +#define OMPT_END_TASK_RUNTIME_ENTER_ADDRESS(task) \ + task->ompt_task_info.frame.enter_frame.ptr = NULL; +#define OMPT_END_RUNTIME_ENTER_ADDRESS(gtid) \ + if ((ompt_enabled.enabled || !ompt_enabled.initialized) && gtid >= 0 && \ + __kmp_threads[gtid]) \ + OMPT_END_TASK_RUNTIME_ENTER_ADDRESS(__kmp_threads[gtid]->th.th_current_task) + +#define OMPT_STORE_RUNTIME_EXIT_ADDRESS_AND_FLAGS(gtid, addr, flags) \ + OmptExitAddressGuard ExitAddressGuard { gtid, addr, flags } +#define OMPT_STORE_RUNTIME_EXIT_ADDRESS(gtid, addr) \ + OMPT_STORE_RUNTIME_EXIT_ADDRESS_AND_FLAGS(gtid, addr, ompt_frame_runtime) +#define OMPT_STORE_CUR_RUNTIME_EXIT_ADDRESS(gtid) \ + OMPT_STORE_RUNTIME_EXIT_ADDRESS_AND_FLAGS(gtid, OMPT_GET_FRAME_ADDRESS(0), \ + ompt_frame_runtime) +#define OMPT_STORE_CUR_RUNTIME_EXIT_ADDRESS_IF(gtid, cond) \ + OmptExitAddressGuard ExitAddressGuard; \ + if (cond) \ + ExitAddressGuard.init(gtid, OMPT_GET_FRAME_ADDRESS(0), ompt_frame_runtime) + +#define OMPT_BEGIN_TASK_RUNTIME_EXIT_ADDRESS(task, addr) \ + do { \ + KMP_DEBUG_ASSERT(task->ompt_task_info.frame.enter_frame.ptr == NULL); \ + KMP_DEBUG_ASSERT(task->ompt_task_info.frame.exit_frame.ptr == NULL); \ + task->ompt_task_info.frame.exit_frame.ptr = addr; \ + task->ompt_task_info.frame.exit_frame_flags = \ + ompt_frame_runtime | ompt_frame_framepointer; \ + } while (0) +#define OMPT_BEGIN_CUR_RUNTIME_EXIT_ADDRESS(gtid) \ + if ((ompt_enabled.enabled || !ompt_enabled.initialized) && gtid >= 0 && \ + __kmp_threads[gtid]) \ + OMPT_BEGIN_TASK_RUNTIME_EXIT_ADDRESS( \ + __kmp_threads[gtid]->th.th_current_task, OMPT_GET_FRAME_ADDRESS(0)) + +#define OMPT_END_TASK_RUNTIME_EXIT_ADDRESS(task) \ + KMP_DEBUG_ASSERT(task->ompt_task_info.frame.enter_frame.ptr == NULL); \ + task->ompt_task_info.frame.exit_frame.ptr = NULL +#define OMPT_END_RUNTIME_EXIT_ADDRESS(gtid) \ + if ((ompt_enabled.enabled || !ompt_enabled.initialized) && gtid >= 0 && \ + __kmp_threads[gtid]) \ + OMPT_END_TASK_RUNTIME_EXIT_ADDRESS(__kmp_threads[gtid]->th.th_current_task) + +#define OMPT_STORE_RETURN_ADDRESS(gtid) \ + OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)}; +#define OMPT_STORE_GIVEN_RETURN_ADDRESS(gtid, addr) \ + OmptReturnAddressGuard ReturnAddressGuard{gtid, addr}; +#define OMPT_RESTORE_RETURN_ADDRESS_IF(gtid, ra, cond) +#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) +#define OMPT_LOAD_RETURN_ADDRESS_OR_NULL(gtid) \ + __ompt_load_return_address(gtid) + template static inline void *__ompt_load_return_address(int gtid) { if (!ompt_enabled.enabled || gtid < 0) @@ -82,19 +153,6 @@ return thr->th.ompt_thread_info.return_address->addr; } -/*#define OMPT_STORE_RETURN_ADDRESS(gtid) \ - if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ - !__kmp_threads[gtid]->th.ompt_thread_info.return_address) \ - __kmp_threads[gtid]->th.ompt_thread_info.return_address = \ - __builtin_return_address(0)*/ -#define OMPT_STORE_RETURN_ADDRESS(gtid) \ - OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)}; -#define OMPT_STORE_GIVEN_RETURN_ADDRESS(gtid, addr) \ - OmptReturnAddressGuard ReturnAddressGuard{gtid, addr}; -#define OMPT_RESTORE_RETURN_ADDRESS_IF(gtid, ra, cond) -#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) -#define OMPT_LOAD_RETURN_ADDRESS_OR_NULL(gtid) \ - __ompt_load_return_address(gtid) //****************************************************************************** // inline functions @@ -147,6 +205,132 @@ } }; +class OmptFrameAddressGuard { +protected: + ompt_data_t FrameAddress{.ptr = nullptr}; + int Flag{0}; + ompt_data_t *TaskFrameAddress{nullptr}; +#ifdef KMP_DEBUG + ompt_data_t *TaskEnterAddress{nullptr}; +#endif + int *TaskFlag{nullptr}; + bool SetAddress{false}; + virtual void lookup(ompt_task_info_t &TaskInfo){}; + +public: + virtual void **getDummyPointer(int Flag) { return &FrameAddress.ptr; } + virtual void **getFramePointer(ompt_task_info_t &TaskInfo, int Flag) { + if ((ompt_enabled.enabled || !ompt_enabled.initialized)) { + SetAddress = true; + lookup(TaskInfo); + KMP_DEBUG_ASSERT(TaskFrameAddress->ptr == NULL); + *TaskFlag = this->Flag = Flag | ompt_frame_framepointer; + } else { + TaskFrameAddress = &FrameAddress; + } + return &TaskFrameAddress->ptr; + } + virtual void **getFramePointer(kmp_taskdata *TaskData, int Flag) { + return getFramePointer(TaskData->ompt_task_info, Flag); + } + virtual void **getFramePointer(kmp_info_t *Thr, int Flag) { + return getFramePointer(Thr->th.th_current_task->ompt_task_info, Flag); + } + virtual void **getFramePointer(int Gtid, int Flag) { + if (Gtid >= 0 && __kmp_threads[Gtid]) + return getFramePointer( + __kmp_threads[Gtid]->th.th_current_task->ompt_task_info, Flag); + else + return &FrameAddress.ptr; + } + virtual void init(ompt_task_info_t &TaskInfo, void *FrameAddress, int Flag) { + if ((ompt_enabled.enabled || !ompt_enabled.initialized) && FrameAddress) { + SetAddress = true; + lookup(TaskInfo); + KMP_DEBUG_ASSERT(TaskFrameAddress->ptr == NULL); + KMP_DEBUG_ASSERT(TaskEnterAddress->ptr == NULL); + TaskFrameAddress->ptr = this->FrameAddress.ptr = FrameAddress; + *TaskFlag = this->Flag = Flag | ompt_frame_framepointer; + } + } + virtual void init(kmp_taskdata *TaskData, void *FrameAddress, int Flag) { + init(TaskData->ompt_task_info, FrameAddress, Flag); + } + virtual void init(kmp_info_t *Thr, void *FrameAddress, int Flag) { + init(Thr->th.th_current_task->ompt_task_info, FrameAddress, Flag); + } + virtual void init(int Gtid, void *FrameAddress, int Flag) { + if (Gtid >= 0 && __kmp_threads[Gtid]) + init(__kmp_threads[Gtid]->th.th_current_task->ompt_task_info, + FrameAddress, Flag); + } + OmptFrameAddressGuard() {} + ~OmptFrameAddressGuard() { + if (SetAddress) + *TaskFrameAddress = ompt_data_none; + } +}; + +class OmptExitAddressGuard : public OmptFrameAddressGuard { +protected: + void lookup(ompt_task_info_t &TaskInfo) { + TaskFrameAddress = &TaskInfo.frame.exit_frame; +#ifdef KMP_DEBUG + TaskEnterAddress = &TaskInfo.frame.enter_frame; +#endif + TaskFlag = &TaskInfo.frame.exit_frame_flags; + } + +public: + OmptExitAddressGuard(kmp_taskdata *TaskData, void *FrameAddress, int Flag) + : OmptFrameAddressGuard() { + this->init(TaskData, FrameAddress, Flag); + } + OmptExitAddressGuard(kmp_info_t *Thr, void *FrameAddress, int Flag) + : OmptFrameAddressGuard() { + this->init(Thr, FrameAddress, Flag); + } + OmptExitAddressGuard(int Gtid, void *FrameAddress, int Flag) + : OmptFrameAddressGuard() { + this->init(Gtid, FrameAddress, Flag); + } + OmptExitAddressGuard() : OmptFrameAddressGuard() {} + ~OmptExitAddressGuard() { + if (SetAddress) + *TaskFrameAddress = ompt_data_none; + } +}; + +class OmptEnterAddressGuard : public OmptFrameAddressGuard { +protected: + void lookup(ompt_task_info_t &TaskInfo) { + TaskFrameAddress = &TaskInfo.frame.enter_frame; +#ifdef KMP_DEBUG + TaskEnterAddress = &TaskInfo.frame.enter_frame; +#endif + TaskFlag = &TaskInfo.frame.enter_frame_flags; + } + +public: + OmptEnterAddressGuard(kmp_taskdata *TaskData, void *FrameAddress, int Flag) + : OmptFrameAddressGuard() { + this->init(TaskData, FrameAddress, Flag); + } + OmptEnterAddressGuard(kmp_info_t *Thr, void *FrameAddress, int Flag) + : OmptFrameAddressGuard() { + this->init(Thr, FrameAddress, Flag); + } + OmptEnterAddressGuard(int Gtid, void *FrameAddress, int Flag) + : OmptFrameAddressGuard() { + this->init(Gtid, FrameAddress, Flag); + } + OmptEnterAddressGuard() : OmptFrameAddressGuard() {} + ~OmptEnterAddressGuard() { + if (SetAddress) + *TaskFrameAddress = ompt_data_none; + } +}; + #endif // OMPT_SUPPORT // macros providing the OMPT callbacks for reduction clause