Index: openmp/runtime/src/kmp_barrier.cpp =================================================================== --- openmp/runtime/src/kmp_barrier.cpp +++ openmp/runtime/src/kmp_barrier.cpp @@ -84,6 +84,7 @@ kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; int nproc = this_thr->th.th_team_nproc; int i; + OMPT_REDUCTION_DECL_IF(this_thr, gtid, reduce); // Don't have to worry about sleep bit here or atomic since team setting kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP; @@ -126,7 +127,6 @@ gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i)); ANNOTATE_REDUCE_AFTER(reduce); - OMPT_REDUCTION_DECL(this_thr, gtid); OMPT_REDUCTION_BEGIN; (*reduce)(this_thr->th.th_local.reduce_data, other_threads[i]->th.th_local.reduce_data); @@ -322,6 +322,7 @@ // Parent threads wait for all their children to arrive new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; child = 1; + OMPT_REDUCTION_DECL_IF(this_thr, gtid, reduce); do { kmp_info_t *child_thr = other_threads[child_tid]; kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; @@ -354,7 +355,6 @@ gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid)); ANNOTATE_REDUCE_AFTER(reduce); - OMPT_REDUCTION_DECL(this_thr, gtid); OMPT_REDUCTION_BEGIN; (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); @@ -539,6 +539,7 @@ /* Perform a hypercube-embedded tree gather to wait until all of the threads have arrived, and reduce any required data as we go. */ kmp_flag_64<> p_flag(&thr_bar->b_arrived); + OMPT_REDUCTION_DECL_IF(this_thr, gtid, reduce); for (level = 0, offset = 1; offset < num_threads; level += branch_bits, offset <<= branch_bits) { kmp_uint32 child; @@ -604,7 +605,6 @@ gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid)); ANNOTATE_REDUCE_AFTER(reduce); - OMPT_REDUCTION_DECL(this_thr, gtid); OMPT_REDUCTION_BEGIN; (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); Index: openmp/runtime/src/kmp_cancel.cpp =================================================================== --- openmp/runtime/src/kmp_cancel.cpp +++ openmp/runtime/src/kmp_cancel.cpp @@ -68,7 +68,7 @@ type = ompt_cancel_sections; ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, type | ompt_cancel_activated, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif // OMPT_SUPPORT && OMPT_OPTIONAL return 1 /* true */; @@ -99,7 +99,7 @@ NULL); ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, ompt_cancel_taskgroup | ompt_cancel_activated, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif return 1 /* true */; @@ -123,6 +123,9 @@ } kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif return __kmp_aux_cancel(loc_ref, gtid, cncl_kind); } @@ -179,7 +182,7 @@ type = ompt_cancel_sections; ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, type | ompt_cancel_detected, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif return 1 /* true */; @@ -213,7 +216,7 @@ NULL); ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, ompt_cancel_taskgroup | ompt_cancel_detected, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif return !!taskgroup->cancel_request; @@ -235,8 +238,12 @@ kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif return __kmp_aux_cancellationpoint(loc_ref, gtid, cncl_kind); } + /*! @ingroup CANCELLATION @param loc_ref location of the original task directive Index: openmp/runtime/src/kmp_csupport.cpp =================================================================== --- openmp/runtime/src/kmp_csupport.cpp +++ openmp/runtime/src/kmp_csupport.cpp @@ -622,6 +622,9 @@ #endif } void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif __kmp_aux_end_serialized_parallel(loc, global_tid); } @@ -709,15 +712,6 @@ __kmp_check_barrier(global_tid, ct_barrier, loc); } -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } - OMPT_STORE_RETURN_ADDRESS(global_tid); -#endif __kmp_threads[global_tid]->th.th_ident = loc; // TODO: explicit barrier_wait_id: // this function is called when 'barrier' directive is present or @@ -727,16 +721,25 @@ // 4) no sync is required __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif } void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { __kmp_assert_valid_gtid(global_tid); +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); + } + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif __kmp_aux_barrier(loc, global_tid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->enter_frame = ompt_data_none; + } +#endif } /* The BARRIER for a MASTER section is always explicit */ @@ -858,7 +861,6 @@ kmp_team_t *team; ompt_wait_id_t lck; void *codeptr_ra; - OMPT_STORE_RETURN_ADDRESS(gtid); if (ompt_enabled.enabled) { team = __kmp_team_from_gtid(gtid); lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; @@ -900,6 +902,9 @@ #endif /* USE_ITT_BUILD */ } void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_aux_ordered(loc, gtid); } @@ -930,7 +935,6 @@ __kmp_parallel_dxo(>id, &cid, loc); #if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( ompt_mutex_ordered, @@ -941,6 +945,9 @@ #endif } void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_aux_end_ordered(loc, gtid); } @@ -1151,9 +1158,6 @@ void __kmp_aux_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) { #if KMP_USE_DYNAMIC_LOCK -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(global_tid); -#endif // OMPT_SUPPORT __kmp_aux_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); #else KMP_COUNT_BLOCK(OMP_CRITICAL); @@ -1197,7 +1201,6 @@ __kmp_itt_critical_acquiring(lck); #endif /* USE_ITT_BUILD */ #if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); void *codeptr_ra = NULL; if (ompt_enabled.enabled) { ti = __kmp_threads[global_tid]->th.ompt_thread_info; @@ -1244,6 +1247,9 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif // OMPT_SUPPORT __kmp_aux_critical(loc, global_tid, crit); } @@ -1387,8 +1393,6 @@ ompt_thread_info_t ti; // This is the case, if called from __kmpc_critical: void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); #endif KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); @@ -1501,6 +1505,9 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif __kmp_aux_critical_with_hint(loc, global_tid, crit, hint); } @@ -1590,11 +1597,10 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL /* OMPT release event triggers after lock is released; place here to trigger * for all #if branches */ - OMPT_STORE_RETURN_ADDRESS(global_tid); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, - OMPT_LOAD_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(global_tid)); } #endif @@ -1604,6 +1610,9 @@ void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif __kmp_aux_end_critical(loc, global_tid, crit); } @@ -2111,21 +2120,11 @@ // Consider next barrier a user-visible barrier for barrier region boundaries // Nesting checks are already handled by the single construct checks - { -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif #if USE_ITT_NOTIFY __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. // tasks can overwrite the location) #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - } } /* -------------------------------------------------------------------------- */ @@ -3402,6 +3401,9 @@ __kmp_resume_if_soft_paused(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif // check correctness of reduce block nesting #if KMP_USE_DYNAMIC_LOCK if (__kmp_env_consistency_check) @@ -3490,7 +3492,6 @@ if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3544,6 +3545,9 @@ KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); __kmp_assert_valid_gtid(global_tid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); @@ -3631,6 +3635,9 @@ __kmp_resume_if_soft_paused(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif // check correctness of reduce block nesting #if KMP_USE_DYNAMIC_LOCK if (__kmp_env_consistency_check) @@ -3679,7 +3686,6 @@ if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = @@ -3739,6 +3745,9 @@ KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); __kmp_assert_valid_gtid(global_tid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif th = __kmp_thread_from_gtid(global_tid); teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); @@ -3761,7 +3770,6 @@ if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3787,7 +3795,6 @@ if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3808,7 +3815,6 @@ if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(global_tid); #endif // TODO: implicit barrier: should be exposed #if USE_ITT_NOTIFY Index: openmp/runtime/src/kmp_dispatch.cpp =================================================================== --- openmp/runtime/src/kmp_dispatch.cpp +++ openmp/runtime/src/kmp_dispatch.cpp @@ -2418,15 +2418,12 @@ */ int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2435,15 +2432,12 @@ int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2451,15 +2445,12 @@ */ int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2468,15 +2459,12 @@ int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! Index: openmp/runtime/src/kmp_gsupport.cpp =================================================================== --- openmp/runtime/src/kmp_gsupport.cpp +++ openmp/runtime/src/kmp_gsupport.cpp @@ -258,12 +258,7 @@ // Retrieve the value of the copyprivate data point, and wait for all // threads to do likewise, then return. retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; - { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - } + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { ompt_frame->enter_frame = ompt_data_none; @@ -290,12 +285,7 @@ OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - } + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { ompt_frame->enter_frame = ompt_data_none; @@ -394,6 +384,9 @@ long chunk_size) { // Initialize the loop worksharing construct. +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(*gtid); +#endif KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, schedule != kmp_sch_static); @@ -573,17 +566,12 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - { \ IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ (schedule) != kmp_sch_static); \ - } \ - { \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ - } \ if (status) { \ KMP_DEBUG_ASSERT(stride == str); \ *p_ub += (str > 0) ? 1 : -1; \ @@ -613,17 +601,12 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - { \ IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ TRUE); \ - } \ - { \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ - } \ if (status) { \ KMP_DEBUG_ASSERT(stride == str); \ *p_ub += (str > 0) ? 1 : -1; \ @@ -738,6 +721,7 @@ gtid, ncounts, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ (schedule) != kmp_sch_static); \ @@ -785,6 +769,7 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ @@ -829,8 +814,8 @@ if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL @@ -1130,11 +1115,11 @@ \ ompt_pre(); \ \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, \ (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ 9, task, data, num_threads, &loc, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \ \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ @@ -1152,7 +1137,6 @@ if (ompt_enabled.enabled) { \ __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \ parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); \ - OMPT_STORE_RETURN_ADDRESS(gtid); \ } #define OMPT_LOOP_POST() \ @@ -1234,10 +1218,12 @@ current_task = __kmp_threads[gtid]->th.th_current_task; current_task->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(gtid); #endif if (if_cond) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) { KMP_ASSERT(depend); kmp_gomp_depends_info_t gomp_depends(depend); @@ -1261,9 +1247,9 @@ oldInfo = thread->th.ompt_thread_info; thread->th.ompt_thread_info.wait_id = 0; thread->th.ompt_thread_info.state = ompt_state_work_parallel; - taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); + // taskdata->ompt_task_info.frame.exit_frame.ptr = + // OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(gtid); #endif if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) { KMP_ASSERT(depend); @@ -1272,6 +1258,9 @@ kmp_depend_info_t dep_list[ndeps]; for (kmp_int32 i = 0; i < ndeps; i++) dep_list[i] = gomp_depends.get_kmp_depend(i); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_aux_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); } @@ -1313,10 +1302,6 @@ MKLOC(loc, "GOMP_taskwait"); int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); #if OMPT_SUPPORT && OMPT_OPTIONAL @@ -1478,12 +1463,7 @@ } #endif task(data); - { -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); - } + KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); #if OMPT_SUPPORT if (ompt_enabled.enabled) { task_info->frame.exit_frame = ompt_data_none; @@ -1510,13 +1490,7 @@ task, data, num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); - { -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - } task(data); KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); @@ -1540,12 +1514,9 @@ 9, task, data, num_threads, &loc, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ \ - { \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - } \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ + (schedule) != kmp_sch_static); \ task(data); \ KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); \ ompt_post(); \ @@ -1627,6 +1598,9 @@ MKLOC(loc, "GOMP_cancellation_point"); KA_TRACE(20, ("GOMP_cancellation_point: T#%d which:%d\n", gtid, which)); kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif return __kmp_aux_cancellationpoint(&loc, gtid, cncl_kind); } @@ -1637,6 +1611,9 @@ KA_TRACE(20, ("GOMP_cancel: T#%d which:%d do_cancel:%d\n", gtid, which, (int)do_cancel)); kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif if (do_cancel == FALSE) { return __kmp_aux_cancellationpoint(&loc, gtid, cncl_kind); @@ -1853,6 +1830,10 @@ void (*func)(void *), void *data, void (*copy_func)(void *, void *), long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks, int priority, long start, long end, long step) { +#if OMPT_SUPPORT + int gtid = __kmp_entry_gtid(); + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __GOMP_taskloop(func, data, copy_func, arg_size, arg_align, gomp_flags, num_tasks, priority, start, end, step); } Index: openmp/runtime/src/kmp_runtime.cpp =================================================================== --- openmp/runtime/src/kmp_runtime.cpp +++ openmp/runtime/src/kmp_runtime.cpp @@ -1186,10 +1186,10 @@ #if OMPT_SUPPORT ompt_data_t ompt_parallel_data = ompt_data_none; ompt_data_t *implicit_task_data; - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); + void *codeptr; if (ompt_enabled.enabled && this_thr->th.ompt_thread_info.state != ompt_state_overhead) { - + codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); ompt_task_info_t *parent_task_info; parent_task_info = OMPT_CUR_TASK_INFO(this_thr); @@ -1364,7 +1364,6 @@ if (__kmp_env_consistency_check) __kmp_push_parallel(global_tid, NULL); #if OMPT_SUPPORT - serial_team->t.ompt_team_info.master_return_address = codeptr; if (ompt_enabled.enabled && this_thr->th.ompt_thread_info.state != ompt_state_overhead) { OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); @@ -1415,6 +1414,9 @@ int teams_level; #if KMP_NESTED_HOT_TEAMS kmp_hot_team_ptr_t **p_hot_teams; +#endif +#if OMPT_SUPPORT + void *return_address = NULL; #endif { // KMP_TIME_BLOCK KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); @@ -1452,12 +1454,13 @@ ompt_data_t *parent_task_data; ompt_frame_t *ompt_frame; ompt_data_t *implicit_task_data; - void *return_address = NULL; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, NULL, NULL); - return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); + // for combined #pragma omp teams parallel, the non-root initial threads + // might not have a return address, so we will not assert here. + return_address = OMPT_LOAD_RETURN_ADDRESS_OR_NULL(gtid); } #endif Index: openmp/runtime/src/kmp_taskdeps.cpp =================================================================== --- openmp/runtime/src/kmp_taskdeps.cpp +++ openmp/runtime/src/kmp_taskdeps.cpp @@ -530,7 +530,7 @@ current_task ? &(current_task->ompt_task_info.frame) : NULL, &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1, - OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } new_taskdata->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); @@ -642,6 +642,9 @@ kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif return __kmp_aux_omp_task_with_deps(loc_ref, gtid, new_task, ndeps, dep_list, ndeps_noalias, noalias_dep_list); } @@ -709,7 +712,7 @@ current_task ? &(current_task->ompt_task_info.frame) : NULL, taskwait_task_data, ompt_task_explicit | ompt_task_undeferred | ompt_task_mergeable, 1, - OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } } @@ -814,6 +817,9 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif return __kmp_aux_omp_wait_deps(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, noalias_dep_list); } Index: openmp/runtime/src/kmp_tasking.cpp =================================================================== --- openmp/runtime/src/kmp_tasking.cpp +++ openmp/runtime/src/kmp_tasking.cpp @@ -1714,8 +1714,8 @@ #if OMPT_SUPPORT kmp_taskdata_t *parent = NULL; if (UNLIKELY(ompt_enabled.enabled)) { + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); if (!new_taskdata->td_flags.started) { - OMPT_STORE_RETURN_ADDRESS(gtid); parent = new_taskdata->td_parent; if (!parent->ompt_task_info.frame.enter_frame.ptr) { parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); @@ -1727,7 +1727,7 @@ parent ? &(parent->ompt_task_info.frame) : NULL, &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, - OMPT_LOAD_RETURN_ADDRESS(gtid)); + codeptr); } } else { // We are scheduling the continuation of an UNTIED task. @@ -1754,6 +1754,9 @@ } kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif return __kmp_aux_omp_task(loc_ref, gtid, new_task); } @@ -1933,7 +1936,6 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (UNLIKELY(ompt_enabled.enabled)) { - OMPT_STORE_RETURN_ADDRESS(gtid); return __kmp_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), OMPT_GET_RETURN_ADDRESS(0)); } @@ -2442,8 +2444,6 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); kmp_team_t *team = thread->th.th_team; ompt_data_t my_task_data = taskdata->ompt_task_info.task_data; // FIXME: I think this is wrong for lwt! @@ -2456,6 +2456,9 @@ #endif } void __kmpc_taskgroup(ident_t *loc, int gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_aux_taskgroup(loc, gtid); } @@ -2479,8 +2482,6 @@ // FIXME: I think this is wrong for lwt! my_parallel_data = team->t.ompt_team_info.parallel_data; codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); } #endif @@ -2603,6 +2604,9 @@ #endif } void __kmpc_end_taskgroup(ident_t *loc, int gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_aux_end_taskgroup(loc, gtid); } @@ -4468,14 +4472,14 @@ void __kmp_aux_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, +#if OMPT_SUPPORT + void *codeptr_ra, +#endif void *task_dup) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); KMP_DEBUG_ASSERT(task != NULL); __kmp_assert_valid_gtid(gtid); if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif __kmp_aux_taskgroup(loc, gtid); } @@ -4519,7 +4523,7 @@ if (ompt_enabled.ompt_callback_work) { ompt_callbacks.ompt_callback(ompt_callback_work)( ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); + &(task_info->task_data), tc, codeptr_ra); } #endif @@ -4574,7 +4578,7 @@ __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, #if OMPT_SUPPORT - OMPT_GET_RETURN_ADDRESS(0), + codeptr_ra, #endif task_dup); // !taskdata->td_flags.native => currently force linear spawning of tasks @@ -4586,7 +4590,7 @@ __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, num_tasks_min, #if OMPT_SUPPORT - OMPT_GET_RETURN_ADDRESS(0), + codeptr_ra, #endif task_dup); } else { @@ -4596,7 +4600,7 @@ __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, #if OMPT_SUPPORT - OMPT_GET_RETURN_ADDRESS(0), + codeptr_ra, #endif task_dup); } @@ -4605,21 +4609,36 @@ if (ompt_enabled.ompt_callback_work) { ompt_callbacks.ompt_callback(ompt_callback_work)( ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), - &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); + &(task_info->task_data), tc, codeptr_ra); } #endif if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif __kmp_aux_end_taskgroup(loc, gtid); } KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); } +void __kmp_aux_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, + int nogroup, int sched, kmp_uint64 grainsize, + void *task_dup) { + __kmp_aux_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, + grainsize, +#if OMPT_SUPPORT + OMPT_LOAD_RETURN_ADDRESS(gtid), +#endif + task_dup); +} void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_aux_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, - grainsize, task_dup); + grainsize, +#if OMPT_SUPPORT + OMPT_GET_RETURN_ADDRESS(0), +#endif + task_dup); } Index: openmp/runtime/src/ompt-general.cpp =================================================================== --- openmp/runtime/src/ompt-general.cpp +++ openmp/runtime/src/ompt-general.cpp @@ -84,7 +84,7 @@ * global variables ****************************************************************************/ -ompt_callbacks_active_t ompt_enabled; +ompt_callbacks_active_t ompt_enabled{0}; ompt_state_info_t ompt_state_info[] = { #define ompt_state_macro(state, code) {#state, state}, @@ -413,6 +413,7 @@ switch (tool_setting) { case omp_tool_disabled: OMPT_VERBOSE_INIT_PRINT("OMP tool disabled. \n"); + ompt_enabled.enabled = 0; break; case omp_tool_unset: @@ -436,6 +437,7 @@ } if (verbose_init && verbose_file != stderr && verbose_file != stdout) fclose(verbose_file); + ompt_enabled.initialized = 1; #if OMPT_DEBUG printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled); #endif Index: openmp/runtime/src/ompt-internal.h =================================================================== --- openmp/runtime/src/ompt-internal.h +++ openmp/runtime/src/ompt-internal.h @@ -37,6 +37,7 @@ typedef struct ompt_callbacks_active_s { unsigned int enabled : 1; + unsigned int initialized : 1; #define ompt_event_macro(event, callback, eventid) unsigned int event : 1; FOREACH_OMPT_EVENT(ompt_event_macro) @@ -71,11 +72,17 @@ struct ompt_lw_taskteam_s *parent; } ompt_lw_taskteam_t; +typedef struct ompt_return_address_s { + struct ompt_return_address_s *next{nullptr}; + void *addr{nullptr}; + ompt_return_address_s(void *ra) : addr(ra) {} +} ompt_return_address_t; + typedef struct { ompt_data_t thread_data; ompt_data_t task_data; /* stored here from implicit barrier-begin until implicit-task-end */ - void *return_address; /* stored here on entry of runtime */ + ompt_return_address_t *return_address; /* stored here on entry of runtime */ ompt_state_t state; ompt_wait_id_t wait_id; int ompt_task_yielded; Index: openmp/runtime/src/ompt-specific.h =================================================================== --- openmp/runtime/src/ompt-specific.h +++ openmp/runtime/src/ompt-specific.h @@ -68,26 +68,31 @@ #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI #define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) -inline void *__ompt_load_return_address(int gtid) { +template +static inline void *__ompt_load_return_address(int gtid) { + if (!ompt_enabled.enabled || gtid < 0) + return NULL; kmp_info_t *thr = __kmp_threads[gtid]; - void *return_address = thr->th.ompt_thread_info.return_address; - thr->th.ompt_thread_info.return_address = NULL; - return return_address; + if (assertion) { + KMP_DEBUG_ASSERT(thr->th.ompt_thread_info.return_address != NULL && + thr->th.ompt_thread_info.return_address->addr != NULL); + } + // even if we assert above for + // thr->th.ompt_thread_info.return_address == NULL, + // we should still handle the case in production and avoid segfault + if (thr->th.ompt_thread_info.return_address == NULL) { + return NULL; + } + return thr->th.ompt_thread_info.return_address->addr; } -/*#define OMPT_STORE_RETURN_ADDRESS(gtid) \ - if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ - !__kmp_threads[gtid]->th.ompt_thread_info.return_address) \ - __kmp_threads[gtid]->th.ompt_thread_info.return_address = \ - __builtin_return_address(0)*/ #define OMPT_STORE_RETURN_ADDRESS(gtid) \ OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)}; -#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) -#define OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid) \ - ((ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ - __kmp_threads[gtid]->th.ompt_thread_info.return_address)? \ - __ompt_load_return_address(gtid): \ - __builtin_return_address(0)) +#define OMPT_STORE_GIVEN_RETURN_ADDRESS(gtid, addr) \ + OmptReturnAddressGuard ReturnAddressGuard{gtid, addr}; +#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) +#define OMPT_LOAD_RETURN_ADDRESS_OR_NULL(gtid) \ + __ompt_load_return_address(gtid) //****************************************************************************** // inline functions @@ -112,20 +117,31 @@ class OmptReturnAddressGuard { private: + ompt_return_address_t Ra; + ompt_return_address_t **ThreadRa; bool SetAddress{false}; int Gtid; public: - OmptReturnAddressGuard(int Gtid, void *ReturnAddress) : Gtid(Gtid) { - if (ompt_enabled.enabled && Gtid >= 0 && __kmp_threads[Gtid] && - !__kmp_threads[Gtid]->th.ompt_thread_info.return_address) { + void init(int Gtid, void *ReturnAddress) { + if ((ompt_enabled.enabled || !ompt_enabled.initialized) && Gtid >= 0 && + __kmp_threads[Gtid]) { + this->Gtid = Gtid; + Ra.addr = ReturnAddress; SetAddress = true; - __kmp_threads[Gtid]->th.ompt_thread_info.return_address = ReturnAddress; + ThreadRa = &__kmp_threads[Gtid]->th.ompt_thread_info.return_address; + Ra.next = *ThreadRa; + *ThreadRa = &Ra; + KMP_DEBUG_ASSERT(Ra.next == NULL || Ra.addr != Ra.next->addr); } } + OmptReturnAddressGuard(int Gtid, void *ReturnAddress) : Ra(ReturnAddress) { + this->init(Gtid, ReturnAddress); + } + OmptReturnAddressGuard() : Ra(NULL) {} ~OmptReturnAddressGuard() { if (SetAddress) - __kmp_threads[Gtid]->th.ompt_thread_info.return_address = NULL; + *ThreadRa = Ra.next; } }; @@ -133,10 +149,17 @@ // macros providing the OMPT callbacks for reduction clause #if OMPT_SUPPORT && OMPT_OPTIONAL +#define OMPT_REDUCTION_DECL_IF(this_thr, gtid, cond) \ + ompt_data_t *my_task_data; \ + ompt_data_t *my_parallel_data; \ + void *return_address; \ + if (cond && ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) { \ + my_task_data = OMPT_CUR_TASK_DATA(this_thr); \ + my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); \ + return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); \ + } #define OMPT_REDUCTION_DECL(this_thr, gtid) \ - ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr); \ - ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); \ - void *return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); + OMPT_REDUCTION_DECL_IF(this_thr, gtid, 1) #define OMPT_REDUCTION_BEGIN \ if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) { \ ompt_callbacks.ompt_callback(ompt_callback_reduction)( \ @@ -150,6 +173,7 @@ my_task_data, return_address); \ } #else // OMPT_SUPPORT && OMPT_OPTIONAL +#define OMPT_REDUCTION_DECL_IF(this_thr, gtid, cond) #define OMPT_REDUCTION_DECL(this_thr, gtid) #define OMPT_REDUCTION_BEGIN #define OMPT_REDUCTION_END Index: openmp/runtime/test/ompt/cancel/cancel_parallel.c =================================================================== --- openmp/runtime/test/ompt/cancel/cancel_parallel.c +++ openmp/runtime/test/ompt/cancel/cancel_parallel.c @@ -1,7 +1,5 @@ // RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s // REQUIRES: ompt -// Current GOMP interface implementation does not support cancellation -// XFAIL: gcc #include "callback.h" #include "omp.h" Index: openmp/runtime/test/ompt/cancel/cancel_taskgroup.c =================================================================== --- openmp/runtime/test/ompt/cancel/cancel_taskgroup.c +++ openmp/runtime/test/ompt/cancel/cancel_taskgroup.c @@ -1,8 +1,7 @@ -// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s -// REQUIRES: ompt -// UNSUPPORTED: clang-3, clang-4.0.0 -// Current GOMP interface implementation does not support cancellation; icc 16 has a bug -// XFAIL: gcc, icc-16 +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | +// %sort-threads | FileCheck %s REQUIRES: ompt UNSUPPORTED: clang-3, clang-4.0.0 +// icc 16 has a bug: +// UNSUPPORTED: icc-16 #include "callback.h" #include @@ -11,8 +10,7 @@ int main() { int condition=0; - #pragma omp parallel num_threads(2) - {} + int nthreads = omp_get_max_threads(); print_frame(0); #pragma omp parallel num_threads(2) @@ -66,11 +64,14 @@ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_masked_begin: + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], - // CHECK-SAME: task_id=[[PARENT_TASK_ID:[0-9]+]], // CHECK-SAME: codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], + // CHECK-SAME: task_id=[[PARENT_TASK_ID:[0-9]+]], + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[FIRST_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[SECOND_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no