Index: openmp/runtime/src/kmp.h =================================================================== --- openmp/runtime/src/kmp.h +++ openmp/runtime/src/kmp.h @@ -3282,9 +3282,61 @@ extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid); extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid); extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid); - +extern int __kmp_aux_dispatch_next_4(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_int32 *p_lb, + kmp_int32 *p_ub, kmp_int32 *p_st); +extern int __kmp_aux_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_uint32 *p_lb, + kmp_uint32 *p_ub, kmp_int32 *p_st); +extern int __kmp_aux_dispatch_next_8(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_int64 *p_lb, + kmp_int64 *p_ub, kmp_int64 *p_st); +extern int __kmp_aux_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_uint64 *p_lb, + kmp_uint64 *p_ub, kmp_int64 *p_st); + +void __kmp_aux_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 num_dims, + const struct kmp_dim *dims); +void __kmp_aux_doacross_wait(ident_t *loc, kmp_int32 gtid, + const kmp_int64 *vec); +void __kmp_aux_doacross_post(ident_t *loc, kmp_int32 gtid, + const kmp_int64 *vec); +void __kmp_aux_doacross_fini(ident_t *loc, kmp_int32 gtid); + +void __kmp_aux_barrier(ident_t *, kmp_int32 global_tid); +void __kmp_aux_ordered(ident_t *, kmp_int32 global_tid); +void __kmp_aux_end_ordered(ident_t *, kmp_int32 global_tid); +void __kmp_aux_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *); +void __kmp_aux_end_critical(ident_t *, kmp_int32 global_tid, + kmp_critical_name *); + +kmp_int32 __kmp_aux_omp_task(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task); +kmp_int32 __kmp_aux_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list); +void __kmp_aux_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list); +kmp_int32 __kmp_aux_cancel(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind); +kmp_int32 __kmp_aux_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind); +void __kmp_aux_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, + kmp_int32 if_val, kmp_uint64 *lb, kmp_uint64 *ub, + kmp_int64 st, kmp_int32 nogroup, kmp_int32 sched, + kmp_uint64 grainsize, void *task_dup); #endif /* KMP_GOMP_COMPAT */ +void __kmp_aux_taskgroup(ident_t *loc, int gtid); +void __kmp_aux_end_taskgroup(ident_t *loc, int gtid); +void __kmp_aux_end_serialized_parallel(ident_t *, kmp_int32 global_tid); +void __kmp_aux_critical_with_hint(ident_t *, kmp_int32 global_tid, + kmp_critical_name *, uint32_t hint); + extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker); extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker); extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker); @@ -3682,7 +3734,6 @@ KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task); KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid); - KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part); @@ -3918,6 +3969,36 @@ #ifdef __cplusplus } + +template +kmp_int32 __kmp_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, + void *return_address); +template +void __kmp_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, + void *enter_frame_address, + void *exit_frame_address, + void *return_address); +template +void __kmp_omp_task_complete_if0_template(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task); + +#if OMPT_SUPPORT +OMPT_NOINLINE +void __kmp_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, void *enter_frame_address, + void *exit_frame_address, + void *return_address); +OMPT_NOINLINE +void __kmp_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task); +#if OMPT_OPTIONAL +OMPT_NOINLINE +kmp_int32 __kmp_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, void *return_address); +#endif // OMPT_OPTIONAL +#endif // OMPT_SUPPORT #endif #endif /* KMP_H */ Index: openmp/runtime/src/kmp_barrier.cpp =================================================================== --- openmp/runtime/src/kmp_barrier.cpp +++ openmp/runtime/src/kmp_barrier.cpp @@ -84,6 +84,7 @@ kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; int nproc = this_thr->th.th_team_nproc; int i; + OMPT_REDUCTION_DECL_IF(this_thr, gtid, reduce); // Don't have to worry about sleep bit here or atomic since team setting kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP; @@ -126,7 +127,6 @@ gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i)); ANNOTATE_REDUCE_AFTER(reduce); - OMPT_REDUCTION_DECL(this_thr, gtid); OMPT_REDUCTION_BEGIN; (*reduce)(this_thr->th.th_local.reduce_data, other_threads[i]->th.th_local.reduce_data); @@ -324,6 +324,7 @@ // Parent threads wait for all their children to arrive new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; child = 1; + OMPT_REDUCTION_DECL_IF(this_thr, gtid, reduce); do { kmp_info_t *child_thr = other_threads[child_tid]; kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; @@ -356,7 +357,6 @@ gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid)); ANNOTATE_REDUCE_AFTER(reduce); - OMPT_REDUCTION_DECL(this_thr, gtid); OMPT_REDUCTION_BEGIN; (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); @@ -541,6 +541,7 @@ /* Perform a hypercube-embedded tree gather to wait until all of the threads have arrived, and reduce any required data as we go. */ kmp_flag_64 p_flag(&thr_bar->b_arrived); + OMPT_REDUCTION_DECL_IF(this_thr, gtid, reduce); for (level = 0, offset = 1; offset < num_threads; level += branch_bits, offset <<= branch_bits) { kmp_uint32 child; @@ -606,7 +607,6 @@ gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid)); ANNOTATE_REDUCE_AFTER(reduce); - OMPT_REDUCTION_DECL(this_thr, gtid); OMPT_REDUCTION_BEGIN; (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); @@ -1384,6 +1384,9 @@ // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956 this_thr->th.th_local.reduce_data = reduce_data; } +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_RESTORE_RETURN_ADDRESS_IF(gtid, return_address, reduce != NULL); +#endif if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec) // use 0 to only setup the current team if nthreads > 1 Index: openmp/runtime/src/kmp_cancel.cpp =================================================================== --- openmp/runtime/src/kmp_cancel.cpp +++ openmp/runtime/src/kmp_cancel.cpp @@ -26,7 +26,8 @@ Request cancellation of the binding OpenMP region. */ -kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { +kmp_int32 __forceinline __kmp_cancel_impl(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind) { kmp_info_t *this_thr = __kmp_threads[gtid]; KC_TRACE(10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, @@ -67,7 +68,7 @@ type = ompt_cancel_sections; ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, type | ompt_cancel_activated, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif // OMPT_SUPPORT && OMPT_OPTIONAL return 1 /* true */; @@ -98,7 +99,7 @@ NULL); ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, ompt_cancel_taskgroup | ompt_cancel_activated, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif return 1 /* true */; @@ -120,6 +121,16 @@ KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); return 0 /* false */; } +kmp_int32 __kmp_aux_cancel(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind) { + return __kmp_cancel_impl(loc_ref, gtid, cncl_kind); +} +kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_cancel_impl(loc_ref, gtid, cncl_kind); +} /*! @ingroup CANCELLATION @@ -132,8 +143,9 @@ Cancellation point for the encountering thread. */ -kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, - kmp_int32 cncl_kind) { +kmp_int32 __forceinline __kmp_cancellationpoint_impl(ident_t *loc_ref, + kmp_int32 gtid, + kmp_int32 cncl_kind) { kmp_info_t *this_thr = __kmp_threads[gtid]; KC_TRACE(10, @@ -174,7 +186,7 @@ type = ompt_cancel_sections; ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, type | ompt_cancel_detected, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif return 1 /* true */; @@ -208,7 +220,7 @@ NULL); ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, ompt_cancel_taskgroup | ompt_cancel_detected, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif return !!taskgroup->cancel_request; @@ -227,6 +239,17 @@ KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); return 0 /* false */; } +kmp_int32 __kmp_aux_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind) { + return __kmp_cancellationpoint_impl(loc_ref, gtid, cncl_kind); +} +kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_cancellationpoint_impl(loc_ref, gtid, cncl_kind); +} /*! @ingroup CANCELLATION Index: openmp/runtime/src/kmp_csupport.cpp =================================================================== --- openmp/runtime/src/kmp_csupport.cpp +++ openmp/runtime/src/kmp_csupport.cpp @@ -480,7 +480,8 @@ Leave a serialized parallel construct. */ -void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { +void __forceinline __kmp_end_serialized_parallel_impl(ident_t *loc, + kmp_int32 global_tid) { kmp_internal_control_t *top; kmp_info_t *this_thr; kmp_team_t *serial_team; @@ -621,6 +622,15 @@ : ompt_state_work_parallel); #endif } +void __kmp_aux_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { + __kmp_end_serialized_parallel_impl(loc, global_tid); +} +void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif + __kmp_end_serialized_parallel_impl(loc, global_tid); +} /*! @ingroup SYNCHRONIZATION @@ -690,10 +700,9 @@ Execute a barrier. */ -void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { +void __forceinline __kmp_barrier_impl(ident_t *loc, kmp_int32 global_tid) { KMP_COUNT_BLOCK(OMP_BARRIER); KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); - __kmp_assert_valid_gtid(global_tid); if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); @@ -707,15 +716,6 @@ __kmp_check_barrier(global_tid, ct_barrier, loc); } -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } - OMPT_STORE_RETURN_ADDRESS(global_tid); -#endif __kmp_threads[global_tid]->th.th_ident = loc; // TODO: explicit barrier_wait_id: // this function is called when 'barrier' directive is present or @@ -725,6 +725,24 @@ // 4) no sync is required __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); +} + +void __kmp_aux_barrier(ident_t *loc, kmp_int32 global_tid) { + __kmp_barrier_impl(loc, global_tid); +} + +void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { + __kmp_assert_valid_gtid(global_tid); +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); + } + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif + __kmp_barrier_impl(loc, global_tid); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { ompt_frame->enter_frame = ompt_data_none; @@ -827,7 +845,7 @@ Start execution of an ordered construct. */ -void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { +void __forceinline __kmp_ordered_impl(ident_t *loc, kmp_int32 gtid) { int cid = 0; kmp_info_t *th; KMP_DEBUG_ASSERT(__kmp_init_serial); @@ -851,7 +869,6 @@ kmp_team_t *team; ompt_wait_id_t lck; void *codeptr_ra; - OMPT_STORE_RETURN_ADDRESS(gtid); if (ompt_enabled.enabled) { team = __kmp_team_from_gtid(gtid); lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; @@ -892,6 +909,15 @@ __kmp_itt_ordered_start(gtid); #endif /* USE_ITT_BUILD */ } +void __kmp_aux_ordered(ident_t *loc, kmp_int32 gtid) { + __kmp_ordered_impl(loc, gtid); +} +void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmp_ordered_impl(loc, gtid); +} /*! @ingroup WORK_SHARING @@ -900,7 +926,7 @@ End execution of an ordered construct. */ -void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { +void __forceinline __kmp_end_ordered_impl(ident_t *loc, kmp_int32 gtid) { int cid = 0; kmp_info_t *th; @@ -920,7 +946,6 @@ __kmp_parallel_dxo(>id, &cid, loc); #if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( ompt_mutex_ordered, @@ -930,6 +955,15 @@ } #endif } +void __kmp_aux_end_ordered(ident_t *loc, kmp_int32 gtid) { + __kmp_end_ordered_impl(loc, gtid); +} +void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmp_end_ordered_impl(loc, gtid); +} #if KMP_USE_DYNAMIC_LOCK @@ -1135,13 +1169,10 @@ Enter code protected by a `critical` construct. This function blocks until the executing thread can enter the critical section. */ -void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit) { +void __forceinline __kmp_critical_impl(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { #if KMP_USE_DYNAMIC_LOCK -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(global_tid); -#endif // OMPT_SUPPORT - __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); + __kmp_aux_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); #else KMP_COUNT_BLOCK(OMP_CRITICAL); #if OMPT_SUPPORT && OMPT_OPTIONAL @@ -1184,7 +1215,6 @@ __kmp_itt_critical_acquiring(lck); #endif /* USE_ITT_BUILD */ #if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); void *codeptr_ra = NULL; if (ompt_enabled.enabled) { ti = __kmp_threads[global_tid]->th.ompt_thread_info; @@ -1229,6 +1259,19 @@ #endif // KMP_USE_DYNAMIC_LOCK } +void __kmp_aux_critical(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { + __kmp_critical_impl(loc, global_tid, crit); +} + +void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif // OMPT_SUPPORT + __kmp_critical_impl(loc, global_tid, crit); +} + #if KMP_USE_DYNAMIC_LOCK // Converts the given hint to an internal lock implementation @@ -1360,8 +1403,10 @@ thread can enter the critical section unless the hint suggests use of speculative execution and the hardware supports it. */ -void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit, uint32_t hint) { +void __forceinline __kmp_critical_with_hint_impl(ident_t *loc, + kmp_int32 global_tid, + kmp_critical_name *crit, + uint32_t hint) { KMP_COUNT_BLOCK(OMP_CRITICAL); kmp_user_lock_p lck; #if OMPT_SUPPORT && OMPT_OPTIONAL @@ -1369,8 +1414,6 @@ ompt_thread_info_t ti; // This is the case, if called from __kmpc_critical: void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); #endif KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); @@ -1481,6 +1524,18 @@ KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); } // __kmpc_critical_with_hint +void __kmp_aux_critical_with_hint(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit, uint32_t hint) { + __kmp_critical_with_hint_impl(loc, global_tid, crit, hint); +} +void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit, uint32_t hint) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif + __kmp_critical_with_hint_impl(loc, global_tid, crit, hint); +} + #endif // KMP_USE_DYNAMIC_LOCK /*! @@ -1492,8 +1547,8 @@ Leave a critical section, releasing any lock that was held during its execution. */ -void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit) { +void __forceinline __kmp_end_critical_impl(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { kmp_user_lock_p lck; KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); @@ -1567,11 +1622,10 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL /* OMPT release event triggers after lock is released; place here to trigger * for all #if branches */ - OMPT_STORE_RETURN_ADDRESS(global_tid); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, - OMPT_LOAD_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(global_tid)); } #endif @@ -1579,6 +1633,18 @@ KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); } +void __kmp_aux_end_critical(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { + __kmp_end_critical_impl(loc, global_tid, crit); +} +void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif + __kmp_end_critical_impl(loc, global_tid, crit); +} + /*! @ingroup SYNCHRONIZATION @param loc source location information @@ -2083,21 +2149,11 @@ // Consider next barrier a user-visible barrier for barrier region boundaries // Nesting checks are already handled by the single construct checks - { -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif #if USE_ITT_NOTIFY __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. // tasks can overwrite the location) #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_OPTIONAL - if (ompt_enabled.enabled) { - ompt_frame->enter_frame = ompt_data_none; - } -#endif - } } /* -------------------------------------------------------------------------- */ @@ -3374,6 +3430,9 @@ __kmp_resume_if_soft_paused(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif // check correctness of reduce block nesting #if KMP_USE_DYNAMIC_LOCK if (__kmp_env_consistency_check) @@ -3462,7 +3521,6 @@ if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3516,6 +3574,9 @@ KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); __kmp_assert_valid_gtid(global_tid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); @@ -3603,6 +3664,9 @@ __kmp_resume_if_soft_paused(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif // check correctness of reduce block nesting #if KMP_USE_DYNAMIC_LOCK if (__kmp_env_consistency_check) @@ -3651,7 +3715,6 @@ if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = @@ -3711,6 +3774,9 @@ KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); __kmp_assert_valid_gtid(global_tid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif th = __kmp_thread_from_gtid(global_tid); teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); @@ -3733,7 +3799,6 @@ if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3759,7 +3824,6 @@ if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3780,7 +3844,6 @@ if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(global_tid); #endif // TODO: implicit barrier: should be exposed #if USE_ITT_NOTIFY @@ -3864,8 +3927,9 @@ Expect compiler send us inclusive bounds, e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. */ -void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, - const struct kmp_dim *dims) { +void __forceinline __kmp_doacross_init_impl(ident_t *loc, int gtid, + int num_dims, + const struct kmp_dim *dims) { __kmp_assert_valid_gtid(gtid); int j, idx; kmp_int64 last, trace_count; @@ -3984,8 +4048,17 @@ // touch shared buffer on each iteration KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); } +void __kmp_aux_doacross_init(ident_t *loc, int gtid, int num_dims, + const struct kmp_dim *dims) { + __kmp_doacross_init_impl(loc, gtid, num_dims, dims); +} +void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, + const struct kmp_dim *dims) { + __kmp_doacross_init_impl(loc, gtid, num_dims, dims); +} -void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { +void __forceinline __kmp_doacross_wait_impl(ident_t *loc, int gtid, + const kmp_int64 *vec) { __kmp_assert_valid_gtid(gtid); kmp_int32 shft, num_dims, i; kmp_uint32 flag; @@ -4096,7 +4169,15 @@ gtid, (iter_number << 5) + shft)); } -void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { +void __kmp_aux_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { + __kmp_doacross_wait_impl(loc, gtid, vec); +} +void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { + __kmp_doacross_wait_impl(loc, gtid, vec); +} + +void __forceinline __kmp_doacross_post_impl(ident_t *loc, int gtid, + const kmp_int64 *vec) { __kmp_assert_valid_gtid(gtid); kmp_int32 shft, num_dims, i; kmp_uint32 flag; @@ -4168,7 +4249,14 @@ (iter_number << 5) + shft)); } -void __kmpc_doacross_fini(ident_t *loc, int gtid) { +void __kmp_aux_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { + __kmp_doacross_post_impl(loc, gtid, vec); +} +void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { + __kmp_doacross_post_impl(loc, gtid, vec); +} + +void __forceinline __kmp_doacross_fini_impl(ident_t *loc, int gtid) { __kmp_assert_valid_gtid(gtid); kmp_int32 num_done; kmp_info_t *th = __kmp_threads[gtid]; @@ -4202,6 +4290,12 @@ pr_buf->th_doacross_info = NULL; KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); } +void __kmp_aux_doacross_fini(ident_t *loc, int gtid) { + __kmp_doacross_fini_impl(loc, gtid); +} +void __kmpc_doacross_fini(ident_t *loc, int gtid) { + __kmp_doacross_fini_impl(loc, gtid); +} /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { Index: openmp/runtime/src/kmp_dispatch.cpp =================================================================== --- openmp/runtime/src/kmp_dispatch.cpp +++ openmp/runtime/src/kmp_dispatch.cpp @@ -992,7 +992,8 @@ ompt_task_info_t *task_info = __ompt_get_task_info_object(0); ompt_callbacks.ompt_callback(ompt_callback_work)( ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid)); + &(task_info->task_data), pr->u.p.tc, + OMPT_LOAD_RETURN_ADDRESS_OR_NULL(gtid)); } #endif KMP_PUSH_PARTITIONED_TIMER(OMP_loop_dynamic); @@ -2418,15 +2419,12 @@ */ int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2435,15 +2433,12 @@ int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2451,15 +2446,12 @@ */ int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2468,15 +2460,12 @@ int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2635,6 +2624,47 @@ __kmp_dispatch_finish_chunk(gtid, loc); } +int __kmp_aux_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int32 *p_lb, kmp_int32 *p_ub, + kmp_int32 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); +} +int __kmp_aux_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint32 *p_lb, kmp_uint32 *p_ub, + kmp_int32 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); +} +int __kmp_aux_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int64 *p_lb, kmp_int64 *p_ub, + kmp_int64 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); +} +int __kmp_aux_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint64 *p_lb, kmp_uint64 *p_ub, + kmp_int64 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); +} + #endif /* KMP_GOMP_COMPAT */ /* ------------------------------------------------------------------------ */ Index: openmp/runtime/src/kmp_gsupport.cpp =================================================================== --- openmp/runtime/src/kmp_gsupport.cpp +++ openmp/runtime/src/kmp_gsupport.cpp @@ -105,7 +105,7 @@ } OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_barrier(&loc, gtid); + __kmp_aux_barrier(&loc, gtid); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { ompt_frame->enter_frame = ompt_data_none; @@ -131,7 +131,7 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); + __kmp_aux_critical(&loc, gtid, __kmp_unnamed_critical_addr); } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_END)(void) { @@ -141,21 +141,21 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); + __kmp_aux_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_critical_name_start"); KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid)); - __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr); + __kmp_aux_critical(&loc, gtid, (kmp_critical_name *)pptr); } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) { int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_critical_name_end"); KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid)); - __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr); + __kmp_aux_end_critical(&loc, gtid, (kmp_critical_name *)pptr); } // The Gnu codegen tries to use locked operations to perform atomic updates @@ -258,12 +258,7 @@ // Retrieve the value of the copyprivate data point, and wait for all // threads to do likewise, then return. retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; - { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - } + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { ompt_frame->enter_frame = ompt_data_none; @@ -290,12 +285,7 @@ OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - } + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { ompt_frame->enter_frame = ompt_data_none; @@ -310,7 +300,7 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_ordered(&loc, gtid); + __kmp_aux_ordered(&loc, gtid); } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) { @@ -320,7 +310,7 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_end_ordered(&loc, gtid); + __kmp_aux_end_ordered(&loc, gtid); } // Dispatch macro defs @@ -331,16 +321,16 @@ #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 -#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 +#define KMP_DISPATCH_NEXT __kmp_aux_dispatch_next_4 #else #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8 -#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8 +#define KMP_DISPATCH_NEXT __kmp_aux_dispatch_next_8 #endif /* KMP_ARCH_X86 */ #define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u #define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u -#define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u +#define KMP_DISPATCH_NEXT_ULL __kmp_aux_dispatch_next_8u // The parallel construct @@ -394,6 +384,9 @@ long chunk_size) { // Initialize the loop worksharing construct. +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(*gtid); +#endif KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, schedule != kmp_sch_static); @@ -573,17 +566,12 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - { \ IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ (schedule) != kmp_sch_static); \ - } \ - { \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ - } \ if (status) { \ KMP_DEBUG_ASSERT(stride == str); \ *p_ub += (str > 0) ? 1 : -1; \ @@ -613,17 +601,12 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - { \ IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ TRUE); \ - } \ - { \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ - } \ if (status) { \ KMP_DEBUG_ASSERT(stride == str); \ *p_ub += (str > 0) ? 1 : -1; \ @@ -642,7 +625,7 @@ #define KMP_DOACROSS_FINI(status, gtid) \ if (!status && __kmp_threads[gtid]->th.th_dispatch->th_doacross_flags) { \ - __kmpc_doacross_fini(NULL, gtid); \ + __kmp_aux_doacross_fini(NULL, gtid); \ } #define LOOP_NEXT(func, fini_code) \ @@ -728,7 +711,7 @@ dims[i].up = counts[i] - 1; \ dims[i].st = 1; \ } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ + __kmp_aux_doacross_init(&loc, gtid, (int)ncounts, dims); \ lb = 0; \ ub = counts[0]; \ str = 1; \ @@ -738,6 +721,7 @@ gtid, ncounts, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ (schedule) != kmp_sch_static); \ @@ -775,7 +759,7 @@ dims[i].up = counts[i] - 1; \ dims[i].st = 1; \ } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ + __kmp_aux_doacross_init(&loc, gtid, (int)ncounts, dims); \ lb = 0; \ ub = counts[0]; \ str = 1; \ @@ -785,6 +769,7 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ @@ -829,8 +814,8 @@ if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL @@ -1018,7 +1003,7 @@ dims[i].up = counts[i] - 1; \ dims[i].st = 1; \ } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ + __kmp_aux_doacross_init(&loc, gtid, (int)ncounts, dims); \ lb = 0; \ ub = counts[0]; \ str = 1; \ @@ -1067,7 +1052,7 @@ dims[i].up = counts[i] - 1; \ dims[i].st = 1; \ } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ + __kmp_aux_doacross_init(&loc, gtid, (int)ncounts, dims); \ lb = 0; \ ub = counts[0]; \ str = 1; \ @@ -1130,11 +1115,11 @@ \ ompt_pre(); \ \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, \ (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ 9, task, data, num_threads, &loc, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \ \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ @@ -1152,7 +1137,6 @@ if (ompt_enabled.enabled) { \ __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \ parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); \ - OMPT_STORE_RETURN_ADDRESS(gtid); \ } #define OMPT_LOOP_POST() \ @@ -1234,10 +1218,12 @@ current_task = __kmp_threads[gtid]->th.th_current_task; current_task->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(gtid); #endif if (if_cond) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) { KMP_ASSERT(depend); kmp_gomp_depends_info_t gomp_depends(depend); @@ -1245,9 +1231,9 @@ kmp_depend_info_t dep_list[ndeps]; for (kmp_int32 i = 0; i < ndeps; i++) dep_list[i] = gomp_depends.get_kmp_depend(i); - __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); + __kmp_aux_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); } else { - __kmpc_omp_task(&loc, gtid, task); + __kmp_aux_omp_task(&loc, gtid, task); } } else { #if OMPT_SUPPORT @@ -1261,9 +1247,9 @@ oldInfo = thread->th.ompt_thread_info; thread->th.ompt_thread_info.wait_id = 0; thread->th.ompt_thread_info.state = ompt_state_work_parallel; - taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); + // taskdata->ompt_task_info.frame.exit_frame.ptr = + // OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(gtid); #endif if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) { KMP_ASSERT(depend); @@ -1272,12 +1258,29 @@ kmp_depend_info_t dep_list[ndeps]; for (kmp_int32 i = 0; i < ndeps; i++) dep_list[i] = gomp_depends.get_kmp_depend(i); - __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmp_aux_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); } - __kmpc_omp_task_begin_if0(&loc, gtid, task); +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled)) { + __kmp_omp_task_begin_if0_ompt(&loc, gtid, task, OMPT_GET_FRAME_ADDRESS(1), + OMPT_GET_FRAME_ADDRESS(0), + OMPT_GET_RETURN_ADDRESS(0)); + } else +#endif + __kmp_omp_task_begin_if0_template(&loc, gtid, task, NULL, NULL, + NULL); func(data); - __kmpc_omp_task_complete_if0(&loc, gtid, task); +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled)) { + __kmp_omp_task_complete_if0_ompt(&loc, gtid, task); + return; + } +#endif + __kmp_omp_task_complete_if0_template(&loc, gtid, task); #if OMPT_SUPPORT if (ompt_enabled.enabled) { @@ -1299,13 +1302,15 @@ MKLOC(loc, "GOMP_taskwait"); int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); - __kmpc_omp_taskwait(&loc, gtid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (UNLIKELY(ompt_enabled.enabled)) { + __kmp_omp_taskwait_ompt(&loc, gtid, OMPT_GET_FRAME_ADDRESS(0), + OMPT_GET_RETURN_ADDRESS(0)); + } else +#endif + __kmp_omp_taskwait_template(&loc, gtid, NULL, NULL); KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid)); } @@ -1458,12 +1463,7 @@ } #endif task(data); - { -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); - } + KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); #if OMPT_SUPPORT if (ompt_enabled.enabled) { task_info->frame.exit_frame = ompt_data_none; @@ -1490,13 +1490,7 @@ task, data, num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); - { -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - } task(data); KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); @@ -1520,12 +1514,9 @@ 9, task, data, num_threads, &loc, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ \ - { \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - } \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ + (schedule) != kmp_sch_static); \ task(data); \ KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); \ ompt_post(); \ @@ -1563,7 +1554,7 @@ OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_taskgroup(&loc, gtid); + __kmp_aux_taskgroup(&loc, gtid); return; } @@ -1577,7 +1568,7 @@ OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_end_taskgroup(&loc, gtid); + __kmp_aux_end_taskgroup(&loc, gtid); return; } @@ -1607,7 +1598,10 @@ MKLOC(loc, "GOMP_cancellation_point"); KA_TRACE(20, ("GOMP_cancellation_point: T#%d which:%d\n", gtid, which)); kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); - return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_aux_cancellationpoint(&loc, gtid, cncl_kind); } // Return true if cancellation should take place, false otherwise @@ -1617,11 +1611,14 @@ KA_TRACE(20, ("GOMP_cancel: T#%d which:%d do_cancel:%d\n", gtid, which, (int)do_cancel)); kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif if (do_cancel == FALSE) { - return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); + return __kmp_aux_cancellationpoint(&loc, gtid, cncl_kind); } else { - return __kmpc_cancel(&loc, gtid, cncl_kind); + return __kmp_aux_cancel(&loc, gtid, cncl_kind); } } @@ -1774,9 +1771,9 @@ loop_bounds = (T *)task->shareds; loop_bounds[0] = start; loop_bounds[1] = end + (up ? -1 : 1); - __kmpc_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]), - (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, nogroup, - sched, (kmp_uint64)num_tasks, (void *)task_dup); + __kmp_aux_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]), + (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, nogroup, + sched, (kmp_uint64)num_tasks, (void *)task_dup); } // 4 byte version of GOMP_doacross_post @@ -1795,7 +1792,7 @@ for (kmp_int64 i = 0; i < num_dims; ++i) { vec[i] = (kmp_int64)count[i]; } - __kmpc_doacross_post(&loc, gtid, vec); + __kmp_aux_doacross_post(&loc, gtid, vec); __kmp_thread_free(th, vec); } @@ -1805,7 +1802,7 @@ template <> void __kmp_GOMP_doacross_post(long *count) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_doacross_post"); - __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); + __kmp_aux_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); } template void __kmp_GOMP_doacross_wait(T first, va_list args) { @@ -1820,7 +1817,7 @@ T item = va_arg(args, T); vec[i] = (kmp_int64)item; } - __kmpc_doacross_wait(&loc, gtid, vec); + __kmp_aux_doacross_wait(&loc, gtid, vec); __kmp_thread_free(th, vec); return; } @@ -1833,6 +1830,10 @@ void (*func)(void *), void *data, void (*copy_func)(void *, void *), long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks, int priority, long start, long end, long step) { +#if OMPT_SUPPORT + int gtid = __kmp_entry_gtid(); + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __GOMP_taskloop(func, data, copy_func, arg_size, arg_align, gomp_flags, num_tasks, priority, start, end, step); } @@ -1862,7 +1863,7 @@ unsigned long long *count) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_doacross_ull_post"); - __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); + __kmp_aux_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)( @@ -1904,7 +1905,7 @@ #if OMPT_SUPPORT OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); + __kmp_aux_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); KA_TRACE(20, ("GOMP_taskwait_depend exit: T#%d\n", gtid)); } Index: openmp/runtime/src/kmp_runtime.cpp =================================================================== --- openmp/runtime/src/kmp_runtime.cpp +++ openmp/runtime/src/kmp_runtime.cpp @@ -1186,10 +1186,10 @@ #if OMPT_SUPPORT ompt_data_t ompt_parallel_data = ompt_data_none; ompt_data_t *implicit_task_data; - void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); + void *codeptr; if (ompt_enabled.enabled && this_thr->th.ompt_thread_info.state != ompt_state_overhead) { - + codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); ompt_task_info_t *parent_task_info; parent_task_info = OMPT_CUR_TASK_INFO(this_thr); @@ -1364,7 +1364,6 @@ if (__kmp_env_consistency_check) __kmp_push_parallel(global_tid, NULL); #if OMPT_SUPPORT - serial_team->t.ompt_team_info.master_return_address = codeptr; if (ompt_enabled.enabled && this_thr->th.ompt_thread_info.state != ompt_state_overhead) { OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); @@ -1415,6 +1414,9 @@ int teams_level; #if KMP_NESTED_HOT_TEAMS kmp_hot_team_ptr_t **p_hot_teams; +#endif +#if OMPT_SUPPORT + void *return_address = NULL; #endif { // KMP_TIME_BLOCK KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); @@ -1452,12 +1454,11 @@ ompt_data_t *parent_task_data; ompt_frame_t *ompt_frame; ompt_data_t *implicit_task_data; - void *return_address = NULL; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, NULL, NULL); - return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); + return_address = OMPT_LOAD_RETURN_ADDRESS_OR_NULL(gtid); } #endif @@ -1494,6 +1495,8 @@ } master_th->th.ompt_thread_info.state = ompt_state_overhead; } + OMPT_RESTORE_RETURN_ADDRESS_IF( + gtid, return_address, microtask == (microtask_t)__kmp_teams_master); #endif master_th->th.th_ident = loc; @@ -1512,7 +1515,7 @@ // Increment our nested depth levels, but not increase the serialization if (parent_team == master_th->th.th_serial_team) { // AC: we are in serialized parallel - __kmpc_serialized_parallel(loc, gtid); + __kmp_serialized_parallel(loc, gtid); KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); if (call_context == fork_context_gnu) { @@ -1742,7 +1745,7 @@ KA_TRACE(20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); - __kmpc_serialized_parallel(loc, gtid); + __kmp_serialized_parallel(loc, gtid); if (call_context == fork_context_intel) { /* TODO this sucks, use the compiler itself to pass args! :) */ @@ -2237,6 +2240,10 @@ } #endif +#if OMPT_SUPPORT + OMPT_RESTORE_RETURN_ADDRESS_IF(gtid, return_address, + team->t.t_invoke == __kmp_invoke_teams_master); +#endif if (!team->t.t_invoke(gtid)) { KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); } @@ -2349,7 +2356,7 @@ team->t.t_serialized++; } } - __kmpc_end_serialized_parallel(loc, gtid); + __kmp_aux_end_serialized_parallel(loc, gtid); #if OMPT_SUPPORT if (ompt_enabled.enabled) { Index: openmp/runtime/src/kmp_taskdeps.cpp =================================================================== --- openmp/runtime/src/kmp_taskdeps.cpp +++ openmp/runtime/src/kmp_taskdeps.cpp @@ -505,11 +505,10 @@ Schedule a non-thread-switchable task with dependences for execution */ -kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task, kmp_int32 ndeps, - kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, - kmp_depend_info_t *noalias_dep_list) { +kmp_int32 __forceinline __kmp_omp_task_with_deps_impl( + ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid, @@ -530,7 +529,7 @@ current_task ? &(current_task->ompt_task_info.frame) : NULL, &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1, - OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } new_taskdata->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); @@ -636,6 +635,25 @@ #endif return ret; } +kmp_int32 __kmp_aux_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { + return __kmp_omp_task_with_deps_impl(loc_ref, gtid, new_task, ndeps, dep_list, + ndeps_noalias, noalias_dep_list); +} +kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_omp_task_with_deps_impl(loc_ref, gtid, new_task, ndeps, dep_list, + ndeps_noalias, noalias_dep_list); +} #if OMPT_SUPPORT void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task, @@ -665,9 +683,10 @@ Blocks the current task until all specifies dependencies have been fulfilled. */ -void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, - kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, - kmp_depend_info_t *noalias_dep_list) { +void __kmp_omp_wait_deps_impl(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref)); if (ndeps == 0 && ndeps_noalias == 0) { @@ -699,7 +718,7 @@ current_task ? &(current_task->ompt_task_info.frame) : NULL, taskwait_task_data, ompt_task_explicit | ompt_task_undeferred | ompt_task_mergeable, 1, - OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } } @@ -799,3 +818,19 @@ KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref)); } +void __kmp_aux_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { + return __kmp_omp_wait_deps_impl(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, + noalias_dep_list); +} +void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_omp_wait_deps_impl(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, + noalias_dep_list); +} Index: openmp/runtime/src/kmp_tasking.cpp =================================================================== --- openmp/runtime/src/kmp_tasking.cpp +++ openmp/runtime/src/kmp_tasking.cpp @@ -595,10 +595,11 @@ #endif template -static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task, - void *frame_address, - void *return_address) { +void __kmp_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, + void *enter_frame_address, + void *exit_frame_address, + void *return_address) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; @@ -622,12 +623,14 @@ #if OMPT_SUPPORT if (ompt) { - if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) { - current_task->ompt_task_info.frame.enter_frame.ptr = - taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address; - current_task->ompt_task_info.frame.enter_frame_flags = - taskdata->ompt_task_info.frame.exit_frame_flags = ompt_frame_application | ompt_frame_framepointer; - } + current_task->ompt_task_info.frame.enter_frame.ptr = enter_frame_address; + taskdata->ompt_task_info.frame.exit_frame.ptr = exit_frame_address; + current_task->ompt_task_info.frame.enter_frame_flags = + ompt_frame_application | ompt_frame_framepointer; + taskdata->ompt_task_info.frame.exit_frame_flags = + ((exit_frame_address == enter_frame_address) ? ompt_frame_application + : ompt_frame_runtime) | + ompt_frame_framepointer; if (ompt_enabled.ompt_callback_task_create) { ompt_task_info_t *parent_info = &(current_task->ompt_task_info); ompt_callbacks.ompt_callback(ompt_callback_task_create)( @@ -646,12 +649,13 @@ #if OMPT_SUPPORT OMPT_NOINLINE -static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task, - void *frame_address, - void *return_address) { - __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, frame_address, - return_address); +void __kmp_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, void *enter_frame_address, + void *exit_frame_address, + void *return_address) { + __kmp_omp_task_begin_if0_template(loc_ref, gtid, task, + enter_frame_address, + exit_frame_address, return_address); } #endif // OMPT_SUPPORT @@ -666,13 +670,14 @@ #if OMPT_SUPPORT if (UNLIKELY(ompt_enabled.enabled)) { OMPT_STORE_RETURN_ADDRESS(gtid); - __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task, - OMPT_GET_FRAME_ADDRESS(1), - OMPT_LOAD_RETURN_ADDRESS(gtid)); + __kmp_omp_task_begin_if0_ompt( + loc_ref, gtid, task, OMPT_GET_FRAME_ADDRESS(1), + OMPT_GET_FRAME_ADDRESS(1), OMPT_LOAD_RETURN_ADDRESS(gtid)); return; } #endif - __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, NULL, NULL); + __kmp_omp_task_begin_if0_template(loc_ref, gtid, task, NULL, NULL, + NULL); } #ifdef TASK_UNUSED @@ -968,9 +973,8 @@ } template -static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref, - kmp_int32 gtid, - kmp_task_t *task) { +void __kmp_omp_task_complete_if0_template(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); __kmp_assert_valid_gtid(gtid); @@ -994,9 +998,9 @@ #if OMPT_SUPPORT OMPT_NOINLINE -void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task) { - __kmpc_omp_task_complete_if0_template(loc_ref, gtid, task); +void __kmp_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { + __kmp_omp_task_complete_if0_template(loc_ref, gtid, task); } #endif // OMPT_SUPPORT @@ -1009,11 +1013,11 @@ kmp_task_t *task) { #if OMPT_SUPPORT if (UNLIKELY(ompt_enabled.enabled)) { - __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task); + __kmp_omp_task_complete_if0_ompt(loc_ref, gtid, task); return; } #endif - __kmpc_omp_task_complete_if0_template(loc_ref, gtid, task); + __kmp_omp_task_complete_if0_template(loc_ref, gtid, task); } #ifdef TASK_UNUSED @@ -1695,8 +1699,8 @@ // be resumed later. // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be // resumed later. -kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task) { +kmp_int32 __forceinline __kmp_omp_task_impl(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task) { kmp_int32 res; KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); @@ -1710,8 +1714,8 @@ #if OMPT_SUPPORT kmp_taskdata_t *parent = NULL; if (UNLIKELY(ompt_enabled.enabled)) { + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); if (!new_taskdata->td_flags.started) { - OMPT_STORE_RETURN_ADDRESS(gtid); parent = new_taskdata->td_parent; if (!parent->ompt_task_info.frame.enter_frame.ptr) { parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); @@ -1723,7 +1727,7 @@ parent ? &(parent->ompt_task_info.frame) : NULL, &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, - OMPT_LOAD_RETURN_ADDRESS(gtid)); + codeptr); } } else { // We are scheduling the continuation of an UNTIED task. @@ -1748,6 +1752,17 @@ #endif return res; } +kmp_int32 __kmp_aux_omp_task(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task) { + return __kmp_omp_task_impl(loc_ref, gtid, new_task); +} +kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_omp_task_impl(loc_ref, gtid, new_task); +} // __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule // a taskloop task with the correct OMPT return address @@ -1805,9 +1820,9 @@ } template -static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, - void *frame_address, - void *return_address) { +kmp_int32 __kmp_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, + void *return_address) { kmp_taskdata_t *taskdata; kmp_info_t *thread; int thread_finished = FALSE; @@ -1912,11 +1927,10 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_NOINLINE -static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, - void *frame_address, - void *return_address) { - return __kmpc_omp_taskwait_template(loc_ref, gtid, frame_address, - return_address); +kmp_int32 __kmp_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, void *return_address) { + return __kmp_omp_taskwait_template(loc_ref, gtid, frame_address, + return_address); } #endif // OMPT_SUPPORT && OMPT_OPTIONAL @@ -1925,12 +1939,11 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (UNLIKELY(ompt_enabled.enabled)) { - OMPT_STORE_RETURN_ADDRESS(gtid); - return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), - OMPT_LOAD_RETURN_ADDRESS(gtid)); + return __kmp_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), + OMPT_GET_RETURN_ADDRESS(0)); } #endif - return __kmpc_omp_taskwait_template(loc_ref, gtid, NULL, NULL); + return __kmp_omp_taskwait_template(loc_ref, gtid, NULL, NULL); } // __kmpc_omp_taskyield: switch to a different task @@ -2325,7 +2338,7 @@ __kmp_assert_valid_gtid(gtid); kmp_info_t *thr = __kmp_threads[gtid]; kmp_int32 nth = thr->th.th_team_nproc; - __kmpc_taskgroup(loc, gtid); // form new taskgroup first + __kmp_aux_taskgroup(loc, gtid); // form new taskgroup first if (nth == 1) { KA_TRACE(10, ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n", @@ -2413,11 +2426,11 @@ Finalize task reduction for a parallel or worksharing. */ void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws) { - __kmpc_end_taskgroup(loc, gtid); + __kmp_aux_end_taskgroup(loc, gtid); } // __kmpc_taskgroup: Start a new taskgroup -void __kmpc_taskgroup(ident_t *loc, int gtid) { +void __forceinline __kmp_taskgroup_impl(ident_t *loc, int gtid) { __kmp_assert_valid_gtid(gtid); kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *taskdata = thread->th.th_current_task; @@ -2434,8 +2447,6 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); kmp_team_t *team = thread->th.th_team; ompt_data_t my_task_data = taskdata->ompt_task_info.task_data; // FIXME: I think this is wrong for lwt! @@ -2447,10 +2458,19 @@ } #endif } +void __kmp_aux_taskgroup(ident_t *loc, int gtid) { + __kmp_taskgroup_impl(loc, gtid); +} +void __kmpc_taskgroup(ident_t *loc, int gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmp_taskgroup_impl(loc, gtid); +} // __kmpc_end_taskgroup: Wait until all tasks generated by the current task // and its descendants are complete -void __kmpc_end_taskgroup(ident_t *loc, int gtid) { +void __kmp_end_taskgroup_impl(ident_t *loc, int gtid) { __kmp_assert_valid_gtid(gtid); kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *taskdata = thread->th.th_current_task; @@ -2468,8 +2488,6 @@ // FIXME: I think this is wrong for lwt! my_parallel_data = team->t.ompt_team_info.parallel_data; codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); } #endif @@ -2591,6 +2609,15 @@ } #endif } +void __kmp_aux_end_taskgroup(ident_t *loc, int gtid) { + __kmp_end_taskgroup_impl(loc, gtid); +} +void __kmpc_end_taskgroup(ident_t *loc, int gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmp_end_taskgroup_impl(loc, gtid); +} // __kmp_remove_my_task: remove a task from my own deque static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid, @@ -4431,17 +4458,18 @@ Execute the taskloop construct. */ -void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, - kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, - int sched, kmp_uint64 grainsize, void *task_dup) { +void __kmp_taskloop_impl(ident_t *loc, int gtid, kmp_task_t *task, int if_val, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, + int nogroup, int sched, kmp_uint64 grainsize, +#if OMPT_SUPPORT + void *codeptr_ra, +#endif + void *task_dup) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); KMP_DEBUG_ASSERT(task != NULL); __kmp_assert_valid_gtid(gtid); if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_taskgroup(loc, gtid); + __kmp_aux_taskgroup(loc, gtid); } // ========================================================================= @@ -4484,7 +4512,7 @@ if (ompt_enabled.ompt_callback_work) { ompt_callbacks.ompt_callback(ompt_callback_work)( ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); + &(task_info->task_data), tc, codeptr_ra); } #endif @@ -4539,7 +4567,7 @@ __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, #if OMPT_SUPPORT - OMPT_GET_RETURN_ADDRESS(0), + codeptr_ra, #endif task_dup); // !taskdata->td_flags.native => currently force linear spawning of tasks @@ -4551,7 +4579,7 @@ __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, num_tasks_min, #if OMPT_SUPPORT - OMPT_GET_RETURN_ADDRESS(0), + codeptr_ra, #endif task_dup); } else { @@ -4561,7 +4589,7 @@ __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, #if OMPT_SUPPORT - OMPT_GET_RETURN_ADDRESS(0), + codeptr_ra, #endif task_dup); } @@ -4570,15 +4598,36 @@ if (ompt_enabled.ompt_callback_work) { ompt_callbacks.ompt_callback(ompt_callback_work)( ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), - &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)); + &(task_info->task_data), tc, codeptr_ra); } #endif if (nogroup == 0) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmpc_end_taskgroup(loc, gtid); + __kmp_aux_end_taskgroup(loc, gtid); } KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); } +void __kmp_aux_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, + int nogroup, int sched, kmp_uint64 grainsize, + void *task_dup) { + __kmp_taskloop_impl(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, + grainsize, +#if OMPT_SUPPORT + OMPT_LOAD_RETURN_ADDRESS(gtid), +#endif + task_dup); +} +void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, + int sched, kmp_uint64 grainsize, void *task_dup) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmp_taskloop_impl(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, + grainsize, +#if OMPT_SUPPORT + OMPT_GET_RETURN_ADDRESS(0), +#endif + task_dup); +} Index: openmp/runtime/src/ompt-general.cpp =================================================================== --- openmp/runtime/src/ompt-general.cpp +++ openmp/runtime/src/ompt-general.cpp @@ -84,7 +84,7 @@ * global variables ****************************************************************************/ -ompt_callbacks_active_t ompt_enabled; +ompt_callbacks_active_t ompt_enabled{0}; ompt_state_info_t ompt_state_info[] = { #define ompt_state_macro(state, code) {#state, state}, @@ -413,6 +413,7 @@ switch (tool_setting) { case omp_tool_disabled: OMPT_VERBOSE_INIT_PRINT("OMP tool disabled. \n"); + ompt_enabled.enabled = 0; break; case omp_tool_unset: @@ -436,6 +437,7 @@ } if (verbose_init && verbose_file != stderr && verbose_file != stdout) fclose(verbose_file); + ompt_enabled.initialized = 1; #if OMPT_DEBUG printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled); #endif Index: openmp/runtime/src/ompt-internal.h =================================================================== --- openmp/runtime/src/ompt-internal.h +++ openmp/runtime/src/ompt-internal.h @@ -37,6 +37,7 @@ typedef struct ompt_callbacks_active_s { unsigned int enabled : 1; + unsigned int initialized : 1; #define ompt_event_macro(event, callback, eventid) unsigned int event : 1; FOREACH_OMPT_EVENT(ompt_event_macro) @@ -71,11 +72,17 @@ struct ompt_lw_taskteam_s *parent; } ompt_lw_taskteam_t; +typedef struct ompt_return_address_s { + struct ompt_return_address_s *next{nullptr}; + void *addr{nullptr}; + ompt_return_address_s(void *ra) : addr(ra) {} +} ompt_return_address_t; + typedef struct { ompt_data_t thread_data; ompt_data_t task_data; /* stored here from implicit barrier-begin until implicit-task-end */ - void *return_address; /* stored here on entry of runtime */ + ompt_return_address_t *return_address; /* stored here on entry of runtime */ ompt_state_t state; ompt_wait_id_t wait_id; int ompt_task_yielded; Index: openmp/runtime/src/ompt-specific.h =================================================================== --- openmp/runtime/src/ompt-specific.h +++ openmp/runtime/src/ompt-specific.h @@ -68,11 +68,18 @@ #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI #define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) -inline void *__ompt_load_return_address(int gtid) { +template +static inline void *__ompt_load_return_address(int gtid) { + if (!ompt_enabled.enabled || gtid < 0) + return NULL; kmp_info_t *thr = __kmp_threads[gtid]; - void *return_address = thr->th.ompt_thread_info.return_address; - thr->th.ompt_thread_info.return_address = NULL; - return return_address; + if (assertion) { + KMP_DEBUG_ASSERT(thr->th.ompt_thread_info.return_address != NULL && + thr->th.ompt_thread_info.return_address->addr != NULL); + } else if (thr->th.ompt_thread_info.return_address == NULL) { + return NULL; + } + return thr->th.ompt_thread_info.return_address->addr; } /*#define OMPT_STORE_RETURN_ADDRESS(gtid) \ @@ -82,12 +89,12 @@ __builtin_return_address(0)*/ #define OMPT_STORE_RETURN_ADDRESS(gtid) \ OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)}; -#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) -#define OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid) \ - ((ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ - __kmp_threads[gtid]->th.ompt_thread_info.return_address)? \ - __ompt_load_return_address(gtid): \ - __builtin_return_address(0)) +#define OMPT_STORE_GIVEN_RETURN_ADDRESS(gtid, addr) \ + OmptReturnAddressGuard ReturnAddressGuard{gtid, addr}; +#define OMPT_RESTORE_RETURN_ADDRESS_IF(gtid, ra, cond) +#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) +#define OMPT_LOAD_RETURN_ADDRESS_OR_NULL(gtid) \ + __ompt_load_return_address(gtid) //****************************************************************************** // inline functions @@ -112,20 +119,31 @@ class OmptReturnAddressGuard { private: + ompt_return_address_t Ra; + ompt_return_address_t **ThreadRa; bool SetAddress{false}; int Gtid; public: - OmptReturnAddressGuard(int Gtid, void *ReturnAddress) : Gtid(Gtid) { - if (ompt_enabled.enabled && Gtid >= 0 && __kmp_threads[Gtid] && - !__kmp_threads[Gtid]->th.ompt_thread_info.return_address) { + void init(int Gtid, void *ReturnAddress) { + if ((ompt_enabled.enabled || !ompt_enabled.initialized) && Gtid >= 0 && + __kmp_threads[Gtid]) { + this->Gtid = Gtid; + Ra.addr = ReturnAddress; SetAddress = true; - __kmp_threads[Gtid]->th.ompt_thread_info.return_address = ReturnAddress; + ThreadRa = &__kmp_threads[Gtid]->th.ompt_thread_info.return_address; + Ra.next = *ThreadRa; + *ThreadRa = &Ra; + KMP_DEBUG_ASSERT(Ra.next == NULL || Ra.addr != Ra.next->addr); } } + OmptReturnAddressGuard(int Gtid, void *ReturnAddress) : Ra(ReturnAddress) { + this->init(Gtid, ReturnAddress); + } + OmptReturnAddressGuard() : Ra(NULL) {} ~OmptReturnAddressGuard() { if (SetAddress) - __kmp_threads[Gtid]->th.ompt_thread_info.return_address = NULL; + *ThreadRa = Ra.next; } }; @@ -133,10 +151,17 @@ // macros providing the OMPT callbacks for reduction clause #if OMPT_SUPPORT && OMPT_OPTIONAL +#define OMPT_REDUCTION_DECL_IF(this_thr, gtid, cond) \ + ompt_data_t *my_task_data; \ + ompt_data_t *my_parallel_data; \ + void *return_address; \ + if (cond && ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) { \ + my_task_data = OMPT_CUR_TASK_DATA(this_thr); \ + my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); \ + return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); \ + } #define OMPT_REDUCTION_DECL(this_thr, gtid) \ - ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr); \ - ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); \ - void *return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); + OMPT_REDUCTION_DECL_IF(this_thr, gtid, 1) #define OMPT_REDUCTION_BEGIN \ if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) { \ ompt_callbacks.ompt_callback(ompt_callback_reduction)( \ @@ -150,6 +175,7 @@ my_task_data, return_address); \ } #else // OMPT_SUPPORT && OMPT_OPTIONAL +#define OMPT_REDUCTION_DECL_IF(this_thr, gtid, cond) #define OMPT_REDUCTION_DECL(this_thr, gtid) #define OMPT_REDUCTION_BEGIN #define OMPT_REDUCTION_END Index: openmp/runtime/test/ompt/cancel/cancel_parallel.c =================================================================== --- openmp/runtime/test/ompt/cancel/cancel_parallel.c +++ openmp/runtime/test/ompt/cancel/cancel_parallel.c @@ -1,7 +1,5 @@ // RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s // REQUIRES: ompt -// Current GOMP interface implementation does not support cancellation -// XFAIL: gcc #include "callback.h" #include "omp.h" Index: openmp/runtime/test/ompt/cancel/cancel_taskgroup.c =================================================================== --- openmp/runtime/test/ompt/cancel/cancel_taskgroup.c +++ openmp/runtime/test/ompt/cancel/cancel_taskgroup.c @@ -1,8 +1,8 @@ -// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s -// REQUIRES: ompt +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt // UNSUPPORTED: clang-3, clang-4.0.0 -// Current GOMP interface implementation does not support cancellation; icc 16 has a bug -// XFAIL: gcc, icc-16 +// icc 16 has a bug: +// UNSUPPORTED: icc-16 #include "callback.h" #include @@ -11,8 +11,7 @@ int main() { int condition=0; - #pragma omp parallel num_threads(2) - {} + int nthreads = omp_get_max_threads(); print_frame(0); #pragma omp parallel num_threads(2) @@ -66,11 +65,14 @@ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_masked_begin: + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], - // CHECK-SAME: task_id=[[PARENT_TASK_ID:[0-9]+]], // CHECK-SAME: codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], + // CHECK-SAME: task_id=[[PARENT_TASK_ID:[0-9]+]], + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[FIRST_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[SECOND_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no