Index: openmp/runtime/src/kmp.h =================================================================== --- openmp/runtime/src/kmp.h +++ openmp/runtime/src/kmp.h @@ -3281,9 +3281,61 @@ extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid); extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid); extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid); - +extern int __kmp_aux_dispatch_next_4(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_int32 *p_lb, + kmp_int32 *p_ub, kmp_int32 *p_st); +extern int __kmp_aux_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_uint32 *p_lb, + kmp_uint32 *p_ub, kmp_int32 *p_st); +extern int __kmp_aux_dispatch_next_8(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_int64 *p_lb, + kmp_int64 *p_ub, kmp_int64 *p_st); +extern int __kmp_aux_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_uint64 *p_lb, + kmp_uint64 *p_ub, kmp_int64 *p_st); + +void __kmp_aux_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 num_dims, + const struct kmp_dim *dims); +void __kmp_aux_doacross_wait(ident_t *loc, kmp_int32 gtid, + const kmp_int64 *vec); +void __kmp_aux_doacross_post(ident_t *loc, kmp_int32 gtid, + const kmp_int64 *vec); +void __kmp_aux_doacross_fini(ident_t *loc, kmp_int32 gtid); + +void __kmp_aux_barrier(ident_t *, kmp_int32 global_tid); +void __kmp_aux_ordered(ident_t *, kmp_int32 global_tid); +void __kmp_aux_end_ordered(ident_t *, kmp_int32 global_tid); +void __kmp_aux_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *); +void __kmp_aux_end_critical(ident_t *, kmp_int32 global_tid, + kmp_critical_name *); + +kmp_int32 __kmp_aux_omp_task(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task); +kmp_int32 __kmp_aux_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list); +void __kmp_aux_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list); +kmp_int32 __kmp_aux_cancel(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind); +kmp_int32 __kmp_aux_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind); +void __kmp_aux_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, + kmp_int32 if_val, kmp_uint64 *lb, kmp_uint64 *ub, + kmp_int64 st, kmp_int32 nogroup, kmp_int32 sched, + kmp_uint64 grainsize, void *task_dup); #endif /* KMP_GOMP_COMPAT */ +void __kmp_aux_taskgroup(ident_t *loc, int gtid); +void __kmp_aux_end_taskgroup(ident_t *loc, int gtid); +void __kmp_aux_end_serialized_parallel(ident_t *, kmp_int32 global_tid); +void __kmp_aux_critical_with_hint(ident_t *, kmp_int32 global_tid, + kmp_critical_name *, uint32_t hint); + extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker); extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker); extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker); @@ -3681,7 +3733,6 @@ KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task); KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid); - KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part); @@ -3917,6 +3968,33 @@ #ifdef __cplusplus } + +template +kmp_int32 __kmp_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, + void *return_address); +template +void __kmp_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, void *frame_address, + void *return_address); +template +void __kmp_omp_task_complete_if0_template(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task); + +#if OMPT_SUPPORT +OMPT_NOINLINE +void __kmp_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, void *frame_address, + void *return_address); +OMPT_NOINLINE +void __kmp_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task); +#if OMPT_OPTIONAL +OMPT_NOINLINE +kmp_int32 __kmp_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, void *return_address); +#endif // OMPT_OPTIONAL +#endif // OMPT_SUPPORT #endif #endif /* KMP_H */ Index: openmp/runtime/src/kmp_cancel.cpp =================================================================== --- openmp/runtime/src/kmp_cancel.cpp +++ openmp/runtime/src/kmp_cancel.cpp @@ -26,7 +26,8 @@ Request cancellation of the binding OpenMP region. */ -kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { +kmp_int32 __forceinline __kmp_cancel_impl(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind) { kmp_info_t *this_thr = __kmp_threads[gtid]; KC_TRACE(10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, @@ -67,7 +68,7 @@ type = ompt_cancel_sections; ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, type | ompt_cancel_activated, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif // OMPT_SUPPORT && OMPT_OPTIONAL return 1 /* true */; @@ -98,7 +99,7 @@ NULL); ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, ompt_cancel_taskgroup | ompt_cancel_activated, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif return 1 /* true */; @@ -120,6 +121,16 @@ KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); return 0 /* false */; } +kmp_int32 __kmp_aux_cancel(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind) { + return __kmp_cancel_impl(loc_ref, gtid, cncl_kind); +} +kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_cancel_impl(loc_ref, gtid, cncl_kind); +} /*! @ingroup CANCELLATION @@ -132,8 +143,9 @@ Cancellation point for the encountering thread. */ -kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, - kmp_int32 cncl_kind) { +kmp_int32 __forceinline __kmp_cancellationpoint_impl(ident_t *loc_ref, + kmp_int32 gtid, + kmp_int32 cncl_kind) { kmp_info_t *this_thr = __kmp_threads[gtid]; KC_TRACE(10, @@ -174,7 +186,7 @@ type = ompt_cancel_sections; ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, type | ompt_cancel_detected, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif return 1 /* true */; @@ -208,7 +220,7 @@ NULL); ompt_callbacks.ompt_callback(ompt_callback_cancel)( task_data, ompt_cancel_taskgroup | ompt_cancel_detected, - OMPT_GET_RETURN_ADDRESS(0)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif return !!taskgroup->cancel_request; @@ -227,6 +239,17 @@ KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); return 0 /* false */; } +kmp_int32 __kmp_aux_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind) { + return __kmp_cancellationpoint_impl(loc_ref, gtid, cncl_kind); +} +kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_cancellationpoint_impl(loc_ref, gtid, cncl_kind); +} /*! @ingroup CANCELLATION Index: openmp/runtime/src/kmp_csupport.cpp =================================================================== --- openmp/runtime/src/kmp_csupport.cpp +++ openmp/runtime/src/kmp_csupport.cpp @@ -480,7 +480,8 @@ Leave a serialized parallel construct. */ -void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { +void __forceinline __kmp_end_serialized_parallel_impl(ident_t *loc, + kmp_int32 global_tid) { kmp_internal_control_t *top; kmp_info_t *this_thr; kmp_team_t *serial_team; @@ -621,6 +622,15 @@ : ompt_state_work_parallel); #endif } +void __kmp_aux_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { + __kmp_end_serialized_parallel_impl(loc, global_tid); +} +void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif + __kmp_end_serialized_parallel_impl(loc, global_tid); +} /*! @ingroup SYNCHRONIZATION @@ -690,10 +700,9 @@ Execute a barrier. */ -void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { +void __forceinline __kmp_barrier_impl(ident_t *loc, kmp_int32 global_tid) { KMP_COUNT_BLOCK(OMP_BARRIER); KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); - __kmp_assert_valid_gtid(global_tid); if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); @@ -707,15 +716,6 @@ __kmp_check_barrier(global_tid, ct_barrier, loc); } -#if OMPT_SUPPORT - ompt_frame_t *ompt_frame; - if (ompt_enabled.enabled) { - __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame.ptr == NULL) - ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - } - OMPT_STORE_RETURN_ADDRESS(global_tid); -#endif __kmp_threads[global_tid]->th.th_ident = loc; // TODO: explicit barrier_wait_id: // this function is called when 'barrier' directive is present or @@ -725,6 +725,24 @@ // 4) no sync is required __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); +} + +void __kmp_aux_barrier(ident_t *loc, kmp_int32 global_tid) { + __kmp_barrier_impl(loc, global_tid); +} + +void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { + __kmp_assert_valid_gtid(global_tid); +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); + } + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif + __kmp_barrier_impl(loc, global_tid); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { ompt_frame->enter_frame = ompt_data_none; @@ -827,7 +845,7 @@ Start execution of an ordered construct. */ -void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { +void __forceinline __kmp_ordered_impl(ident_t *loc, kmp_int32 gtid) { int cid = 0; kmp_info_t *th; KMP_DEBUG_ASSERT(__kmp_init_serial); @@ -851,7 +869,6 @@ kmp_team_t *team; ompt_wait_id_t lck; void *codeptr_ra; - OMPT_STORE_RETURN_ADDRESS(gtid); if (ompt_enabled.enabled) { team = __kmp_team_from_gtid(gtid); lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; @@ -892,6 +909,15 @@ __kmp_itt_ordered_start(gtid); #endif /* USE_ITT_BUILD */ } +void __kmp_aux_ordered(ident_t *loc, kmp_int32 gtid) { + __kmp_ordered_impl(loc, gtid); +} +void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmp_ordered_impl(loc, gtid); +} /*! @ingroup WORK_SHARING @@ -900,7 +926,7 @@ End execution of an ordered construct. */ -void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { +void __forceinline __kmp_end_ordered_impl(ident_t *loc, kmp_int32 gtid) { int cid = 0; kmp_info_t *th; @@ -920,7 +946,6 @@ __kmp_parallel_dxo(>id, &cid, loc); #if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( ompt_mutex_ordered, @@ -930,6 +955,15 @@ } #endif } +void __kmp_aux_end_ordered(ident_t *loc, kmp_int32 gtid) { + __kmp_end_ordered_impl(loc, gtid); +} +void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmp_end_ordered_impl(loc, gtid); +} #if KMP_USE_DYNAMIC_LOCK @@ -1135,13 +1169,10 @@ Enter code protected by a `critical` construct. This function blocks until the executing thread can enter the critical section. */ -void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit) { +void __forceinline __kmp_critical_impl(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { #if KMP_USE_DYNAMIC_LOCK -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(global_tid); -#endif // OMPT_SUPPORT - __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); + __kmp_aux_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); #else KMP_COUNT_BLOCK(OMP_CRITICAL); #if OMPT_SUPPORT && OMPT_OPTIONAL @@ -1184,7 +1215,6 @@ __kmp_itt_critical_acquiring(lck); #endif /* USE_ITT_BUILD */ #if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); void *codeptr_ra = NULL; if (ompt_enabled.enabled) { ti = __kmp_threads[global_tid]->th.ompt_thread_info; @@ -1229,6 +1259,19 @@ #endif // KMP_USE_DYNAMIC_LOCK } +void __kmp_aux_critical(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { + __kmp_critical_impl(loc, global_tid, crit); +} + +void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif // OMPT_SUPPORT + __kmp_critical_impl(loc, global_tid, crit); +} + #if KMP_USE_DYNAMIC_LOCK // Converts the given hint to an internal lock implementation @@ -1360,8 +1403,10 @@ thread can enter the critical section unless the hint suggests use of speculative execution and the hardware supports it. */ -void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit, uint32_t hint) { +void __forceinline __kmp_critical_with_hint_impl(ident_t *loc, + kmp_int32 global_tid, + kmp_critical_name *crit, + uint32_t hint) { KMP_COUNT_BLOCK(OMP_CRITICAL); kmp_user_lock_p lck; #if OMPT_SUPPORT && OMPT_OPTIONAL @@ -1369,8 +1414,6 @@ ompt_thread_info_t ti; // This is the case, if called from __kmpc_critical: void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); #endif KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); @@ -1481,6 +1524,18 @@ KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); } // __kmpc_critical_with_hint +void __kmp_aux_critical_with_hint(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit, uint32_t hint) { + __kmp_critical_with_hint_impl(loc, global_tid, crit, hint); +} +void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit, uint32_t hint) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif + __kmp_critical_with_hint_impl(loc, global_tid, crit, hint); +} + #endif // KMP_USE_DYNAMIC_LOCK /*! @@ -1492,8 +1547,8 @@ Leave a critical section, releasing any lock that was held during its execution. */ -void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, - kmp_critical_name *crit) { +void __forceinline __kmp_end_critical_impl(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { kmp_user_lock_p lck; KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); @@ -1567,7 +1622,6 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL /* OMPT release event triggers after lock is released; place here to trigger * for all #if branches */ - OMPT_STORE_RETURN_ADDRESS(global_tid); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, @@ -1579,6 +1633,18 @@ KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); } +void __kmp_aux_end_critical(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { + __kmp_end_critical_impl(loc, global_tid, crit); +} +void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif + __kmp_end_critical_impl(loc, global_tid, crit); +} + /*! @ingroup SYNCHRONIZATION @param loc source location information @@ -3864,8 +3930,9 @@ Expect compiler send us inclusive bounds, e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. */ -void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, - const struct kmp_dim *dims) { +void __forceinline __kmp_doacross_init_impl(ident_t *loc, int gtid, + int num_dims, + const struct kmp_dim *dims) { __kmp_assert_valid_gtid(gtid); int j, idx; kmp_int64 last, trace_count; @@ -3984,8 +4051,17 @@ // touch shared buffer on each iteration KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); } +void __kmp_aux_doacross_init(ident_t *loc, int gtid, int num_dims, + const struct kmp_dim *dims) { + __kmp_doacross_init_impl(loc, gtid, num_dims, dims); +} +void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, + const struct kmp_dim *dims) { + __kmp_doacross_init_impl(loc, gtid, num_dims, dims); +} -void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { +void __forceinline __kmp_doacross_wait_impl(ident_t *loc, int gtid, + const kmp_int64 *vec) { __kmp_assert_valid_gtid(gtid); kmp_int32 shft, num_dims, i; kmp_uint32 flag; @@ -4096,7 +4172,15 @@ gtid, (iter_number << 5) + shft)); } -void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { +void __kmp_aux_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { + __kmp_doacross_wait_impl(loc, gtid, vec); +} +void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { + __kmp_doacross_wait_impl(loc, gtid, vec); +} + +void __forceinline __kmp_doacross_post_impl(ident_t *loc, int gtid, + const kmp_int64 *vec) { __kmp_assert_valid_gtid(gtid); kmp_int32 shft, num_dims, i; kmp_uint32 flag; @@ -4168,7 +4252,14 @@ (iter_number << 5) + shft)); } -void __kmpc_doacross_fini(ident_t *loc, int gtid) { +void __kmp_aux_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { + __kmp_doacross_post_impl(loc, gtid, vec); +} +void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { + __kmp_doacross_post_impl(loc, gtid, vec); +} + +void __forceinline __kmp_doacross_fini_impl(ident_t *loc, int gtid) { __kmp_assert_valid_gtid(gtid); kmp_int32 num_done; kmp_info_t *th = __kmp_threads[gtid]; @@ -4202,6 +4293,12 @@ pr_buf->th_doacross_info = NULL; KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); } +void __kmp_aux_doacross_fini(ident_t *loc, int gtid) { + __kmp_doacross_fini_impl(loc, gtid); +} +void __kmpc_doacross_fini(ident_t *loc, int gtid) { + __kmp_doacross_fini_impl(loc, gtid); +} /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { Index: openmp/runtime/src/kmp_dispatch.cpp =================================================================== --- openmp/runtime/src/kmp_dispatch.cpp +++ openmp/runtime/src/kmp_dispatch.cpp @@ -2418,15 +2418,12 @@ */ int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2435,15 +2432,12 @@ int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2451,15 +2445,12 @@ */ int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2468,15 +2459,12 @@ int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st) { -#if OMPT_SUPPORT && OMPT_OPTIONAL - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st #if OMPT_SUPPORT && OMPT_OPTIONAL , - OMPT_LOAD_RETURN_ADDRESS(gtid) + OMPT_GET_RETURN_ADDRESS(0) #endif - ); + ); } /*! @@ -2635,6 +2623,47 @@ __kmp_dispatch_finish_chunk(gtid, loc); } +int __kmp_aux_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int32 *p_lb, kmp_int32 *p_ub, + kmp_int32 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); +} +int __kmp_aux_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint32 *p_lb, kmp_uint32 *p_ub, + kmp_int32 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); +} +int __kmp_aux_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int64 *p_lb, kmp_int64 *p_ub, + kmp_int64 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); +} +int __kmp_aux_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint64 *p_lb, kmp_uint64 *p_ub, + kmp_int64 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); +} + #endif /* KMP_GOMP_COMPAT */ /* ------------------------------------------------------------------------ */ Index: openmp/runtime/src/kmp_gsupport.cpp =================================================================== --- openmp/runtime/src/kmp_gsupport.cpp +++ openmp/runtime/src/kmp_gsupport.cpp @@ -105,7 +105,7 @@ } OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_barrier(&loc, gtid); + __kmp_aux_barrier(&loc, gtid); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { ompt_frame->enter_frame = ompt_data_none; @@ -131,7 +131,7 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); + __kmp_aux_critical(&loc, gtid, __kmp_unnamed_critical_addr); } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_END)(void) { @@ -141,21 +141,21 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); + __kmp_aux_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_critical_name_start"); KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid)); - __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr); + __kmp_aux_critical(&loc, gtid, (kmp_critical_name *)pptr); } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) { int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_critical_name_end"); KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid)); - __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr); + __kmp_aux_end_critical(&loc, gtid, (kmp_critical_name *)pptr); } // The Gnu codegen tries to use locked operations to perform atomic updates @@ -310,7 +310,7 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_ordered(&loc, gtid); + __kmp_aux_ordered(&loc, gtid); } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) { @@ -320,7 +320,7 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_end_ordered(&loc, gtid); + __kmp_aux_end_ordered(&loc, gtid); } // Dispatch macro defs @@ -331,16 +331,16 @@ #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 -#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 +#define KMP_DISPATCH_NEXT __kmp_aux_dispatch_next_4 #else #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8 -#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8 +#define KMP_DISPATCH_NEXT __kmp_aux_dispatch_next_8 #endif /* KMP_ARCH_X86 */ #define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u #define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u -#define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u +#define KMP_DISPATCH_NEXT_ULL __kmp_aux_dispatch_next_8u // The parallel construct @@ -642,7 +642,7 @@ #define KMP_DOACROSS_FINI(status, gtid) \ if (!status && __kmp_threads[gtid]->th.th_dispatch->th_doacross_flags) { \ - __kmpc_doacross_fini(NULL, gtid); \ + __kmp_aux_doacross_fini(NULL, gtid); \ } #define LOOP_NEXT(func, fini_code) \ @@ -728,7 +728,7 @@ dims[i].up = counts[i] - 1; \ dims[i].st = 1; \ } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ + __kmp_aux_doacross_init(&loc, gtid, (int)ncounts, dims); \ lb = 0; \ ub = counts[0]; \ str = 1; \ @@ -775,7 +775,7 @@ dims[i].up = counts[i] - 1; \ dims[i].st = 1; \ } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ + __kmp_aux_doacross_init(&loc, gtid, (int)ncounts, dims); \ lb = 0; \ ub = counts[0]; \ str = 1; \ @@ -1018,7 +1018,7 @@ dims[i].up = counts[i] - 1; \ dims[i].st = 1; \ } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ + __kmp_aux_doacross_init(&loc, gtid, (int)ncounts, dims); \ lb = 0; \ ub = counts[0]; \ str = 1; \ @@ -1067,7 +1067,7 @@ dims[i].up = counts[i] - 1; \ dims[i].st = 1; \ } \ - __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \ + __kmp_aux_doacross_init(&loc, gtid, (int)ncounts, dims); \ lb = 0; \ ub = counts[0]; \ str = 1; \ @@ -1245,9 +1245,9 @@ kmp_depend_info_t dep_list[ndeps]; for (kmp_int32 i = 0; i < ndeps; i++) dep_list[i] = gomp_depends.get_kmp_depend(i); - __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); + __kmp_aux_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); } else { - __kmpc_omp_task(&loc, gtid, task); + __kmp_aux_omp_task(&loc, gtid, task); } } else { #if OMPT_SUPPORT @@ -1263,7 +1263,6 @@ thread->th.ompt_thread_info.state = ompt_state_work_parallel; taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } - OMPT_STORE_RETURN_ADDRESS(gtid); #endif if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) { KMP_ASSERT(depend); @@ -1272,12 +1271,24 @@ kmp_depend_info_t dep_list[ndeps]; for (kmp_int32 i = 0; i < ndeps; i++) dep_list[i] = gomp_depends.get_kmp_depend(i); - __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); + __kmp_aux_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); } - __kmpc_omp_task_begin_if0(&loc, gtid, task); +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled)) { + __kmp_omp_task_begin_if0_ompt(&loc, gtid, task, OMPT_GET_FRAME_ADDRESS(0), + OMPT_LOAD_RETURN_ADDRESS(gtid)); + } else +#endif + __kmp_omp_task_begin_if0_template(&loc, gtid, task, NULL, NULL); func(data); - __kmpc_omp_task_complete_if0(&loc, gtid, task); +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled)) { + __kmp_omp_task_complete_if0_ompt(&loc, gtid, task); + return; + } +#endif + __kmp_omp_task_complete_if0_template(&loc, gtid, task); #if OMPT_SUPPORT if (ompt_enabled.enabled) { @@ -1299,13 +1310,15 @@ MKLOC(loc, "GOMP_taskwait"); int gtid = __kmp_entry_gtid(); -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); - __kmpc_omp_taskwait(&loc, gtid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (UNLIKELY(ompt_enabled.enabled)) { + __kmp_omp_taskwait_ompt(&loc, gtid, OMPT_GET_FRAME_ADDRESS(0), + OMPT_GET_RETURN_ADDRESS(0)); + } else +#endif + __kmp_omp_taskwait_template(&loc, gtid, NULL, NULL); KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid)); } @@ -1563,7 +1576,7 @@ OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_taskgroup(&loc, gtid); + __kmp_aux_taskgroup(&loc, gtid); return; } @@ -1577,7 +1590,7 @@ OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_end_taskgroup(&loc, gtid); + __kmp_aux_end_taskgroup(&loc, gtid); return; } @@ -1607,7 +1620,10 @@ MKLOC(loc, "GOMP_cancellation_point"); KA_TRACE(20, ("GOMP_cancellation_point: T#%d which:%d\n", gtid, which)); kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); - return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_aux_cancellationpoint(&loc, gtid, cncl_kind); } // Return true if cancellation should take place, false otherwise @@ -1617,11 +1633,14 @@ KA_TRACE(20, ("GOMP_cancel: T#%d which:%d do_cancel:%d\n", gtid, which, (int)do_cancel)); kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif if (do_cancel == FALSE) { - return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); + return __kmp_aux_cancellationpoint(&loc, gtid, cncl_kind); } else { - return __kmpc_cancel(&loc, gtid, cncl_kind); + return __kmp_aux_cancel(&loc, gtid, cncl_kind); } } @@ -1774,9 +1793,9 @@ loop_bounds = (T *)task->shareds; loop_bounds[0] = start; loop_bounds[1] = end + (up ? -1 : 1); - __kmpc_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]), - (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, nogroup, - sched, (kmp_uint64)num_tasks, (void *)task_dup); + __kmp_aux_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]), + (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, nogroup, + sched, (kmp_uint64)num_tasks, (void *)task_dup); } // 4 byte version of GOMP_doacross_post @@ -1795,7 +1814,7 @@ for (kmp_int64 i = 0; i < num_dims; ++i) { vec[i] = (kmp_int64)count[i]; } - __kmpc_doacross_post(&loc, gtid, vec); + __kmp_aux_doacross_post(&loc, gtid, vec); __kmp_thread_free(th, vec); } @@ -1805,7 +1824,7 @@ template <> void __kmp_GOMP_doacross_post(long *count) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_doacross_post"); - __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); + __kmp_aux_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); } template void __kmp_GOMP_doacross_wait(T first, va_list args) { @@ -1820,7 +1839,7 @@ T item = va_arg(args, T); vec[i] = (kmp_int64)item; } - __kmpc_doacross_wait(&loc, gtid, vec); + __kmp_aux_doacross_wait(&loc, gtid, vec); __kmp_thread_free(th, vec); return; } @@ -1862,7 +1881,7 @@ unsigned long long *count) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_doacross_ull_post"); - __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); + __kmp_aux_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count)); } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)( @@ -1904,7 +1923,7 @@ #if OMPT_SUPPORT OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); + __kmp_aux_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL); KA_TRACE(20, ("GOMP_taskwait_depend exit: T#%d\n", gtid)); } Index: openmp/runtime/src/kmp_runtime.cpp =================================================================== --- openmp/runtime/src/kmp_runtime.cpp +++ openmp/runtime/src/kmp_runtime.cpp @@ -1513,7 +1513,7 @@ // Increment our nested depth levels, but not increase the serialization if (parent_team == master_th->th.th_serial_team) { // AC: we are in serialized parallel - __kmpc_serialized_parallel(loc, gtid); + __kmp_serialized_parallel(loc, gtid); KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); if (call_context == fork_context_gnu) { @@ -1743,7 +1743,7 @@ KA_TRACE(20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); - __kmpc_serialized_parallel(loc, gtid); + __kmp_serialized_parallel(loc, gtid); if (call_context == fork_context_intel) { /* TODO this sucks, use the compiler itself to pass args! :) */ @@ -2350,7 +2350,7 @@ team->t.t_serialized++; } } - __kmpc_end_serialized_parallel(loc, gtid); + __kmp_aux_end_serialized_parallel(loc, gtid); #if OMPT_SUPPORT if (ompt_enabled.enabled) { Index: openmp/runtime/src/kmp_taskdeps.cpp =================================================================== --- openmp/runtime/src/kmp_taskdeps.cpp +++ openmp/runtime/src/kmp_taskdeps.cpp @@ -505,11 +505,10 @@ Schedule a non-thread-switchable task with dependences for execution */ -kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task, kmp_int32 ndeps, - kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, - kmp_depend_info_t *noalias_dep_list) { +kmp_int32 __forceinline __kmp_omp_task_with_deps_impl( + ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid, @@ -530,7 +529,7 @@ current_task ? &(current_task->ompt_task_info.frame) : NULL, &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1, - OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } new_taskdata->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); @@ -636,6 +635,25 @@ #endif return ret; } +kmp_int32 __kmp_aux_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { + return __kmp_omp_task_with_deps_impl(loc_ref, gtid, new_task, ndeps, dep_list, + ndeps_noalias, noalias_dep_list); +} +kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_omp_task_with_deps_impl(loc_ref, gtid, new_task, ndeps, dep_list, + ndeps_noalias, noalias_dep_list); +} #if OMPT_SUPPORT void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task, @@ -665,9 +683,10 @@ Blocks the current task until all specifies dependencies have been fulfilled. */ -void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, - kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, - kmp_depend_info_t *noalias_dep_list) { +void __kmp_omp_wait_deps_impl(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref)); if (ndeps == 0 && ndeps_noalias == 0) { @@ -699,7 +718,7 @@ current_task ? &(current_task->ompt_task_info.frame) : NULL, taskwait_task_data, ompt_task_explicit | ompt_task_undeferred | ompt_task_mergeable, 1, - OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid)); + OMPT_LOAD_RETURN_ADDRESS(gtid)); } } @@ -799,3 +818,19 @@ KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref)); } +void __kmp_aux_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { + return __kmp_omp_wait_deps_impl(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, + noalias_dep_list); +} +void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_omp_wait_deps_impl(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, + noalias_dep_list); +} Index: openmp/runtime/src/kmp_tasking.cpp =================================================================== --- openmp/runtime/src/kmp_tasking.cpp +++ openmp/runtime/src/kmp_tasking.cpp @@ -595,10 +595,9 @@ #endif template -static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task, - void *frame_address, - void *return_address) { +void __kmp_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, void *frame_address, + void *return_address) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; @@ -646,12 +645,11 @@ #if OMPT_SUPPORT OMPT_NOINLINE -static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task, - void *frame_address, - void *return_address) { - __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, frame_address, - return_address); +void __kmp_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, void *frame_address, + void *return_address) { + __kmp_omp_task_begin_if0_template(loc_ref, gtid, task, frame_address, + return_address); } #endif // OMPT_SUPPORT @@ -666,13 +664,13 @@ #if OMPT_SUPPORT if (UNLIKELY(ompt_enabled.enabled)) { OMPT_STORE_RETURN_ADDRESS(gtid); - __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task, - OMPT_GET_FRAME_ADDRESS(1), - OMPT_LOAD_RETURN_ADDRESS(gtid)); + __kmp_omp_task_begin_if0_ompt(loc_ref, gtid, task, + OMPT_GET_FRAME_ADDRESS(1), + OMPT_LOAD_RETURN_ADDRESS(gtid)); return; } #endif - __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, NULL, NULL); + __kmp_omp_task_begin_if0_template(loc_ref, gtid, task, NULL, NULL); } #ifdef TASK_UNUSED @@ -979,9 +977,8 @@ } template -static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref, - kmp_int32 gtid, - kmp_task_t *task) { +void __kmp_omp_task_complete_if0_template(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); __kmp_assert_valid_gtid(gtid); @@ -1005,9 +1002,9 @@ #if OMPT_SUPPORT OMPT_NOINLINE -void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task) { - __kmpc_omp_task_complete_if0_template(loc_ref, gtid, task); +void __kmp_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { + __kmp_omp_task_complete_if0_template(loc_ref, gtid, task); } #endif // OMPT_SUPPORT @@ -1020,11 +1017,11 @@ kmp_task_t *task) { #if OMPT_SUPPORT if (UNLIKELY(ompt_enabled.enabled)) { - __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task); + __kmp_omp_task_complete_if0_ompt(loc_ref, gtid, task); return; } #endif - __kmpc_omp_task_complete_if0_template(loc_ref, gtid, task); + __kmp_omp_task_complete_if0_template(loc_ref, gtid, task); } #ifdef TASK_UNUSED @@ -1706,8 +1703,8 @@ // be resumed later. // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be // resumed later. -kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *new_task) { +kmp_int32 __forceinline __kmp_omp_task_impl(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task) { kmp_int32 res; KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); @@ -1721,8 +1718,8 @@ #if OMPT_SUPPORT kmp_taskdata_t *parent = NULL; if (UNLIKELY(ompt_enabled.enabled)) { + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); if (!new_taskdata->td_flags.started) { - OMPT_STORE_RETURN_ADDRESS(gtid); parent = new_taskdata->td_parent; if (!parent->ompt_task_info.frame.enter_frame.ptr) { parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); @@ -1734,7 +1731,7 @@ parent ? &(parent->ompt_task_info.frame) : NULL, &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, - OMPT_LOAD_RETURN_ADDRESS(gtid)); + codeptr); } } else { // We are scheduling the continuation of an UNTIED task. @@ -1759,6 +1756,17 @@ #endif return res; } +kmp_int32 __kmp_aux_omp_task(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task) { + return __kmp_omp_task_impl(loc_ref, gtid, new_task); +} +kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_omp_task_impl(loc_ref, gtid, new_task); +} // __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule // a taskloop task with the correct OMPT return address @@ -1816,9 +1824,9 @@ } template -static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, - void *frame_address, - void *return_address) { +kmp_int32 __kmp_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, + void *return_address) { kmp_taskdata_t *taskdata; kmp_info_t *thread; int thread_finished = FALSE; @@ -1923,11 +1931,10 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_NOINLINE -static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, - void *frame_address, - void *return_address) { - return __kmpc_omp_taskwait_template(loc_ref, gtid, frame_address, - return_address); +kmp_int32 __kmp_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, void *return_address) { + return __kmp_omp_taskwait_template(loc_ref, gtid, frame_address, + return_address); } #endif // OMPT_SUPPORT && OMPT_OPTIONAL @@ -1936,12 +1943,11 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (UNLIKELY(ompt_enabled.enabled)) { - OMPT_STORE_RETURN_ADDRESS(gtid); - return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), - OMPT_LOAD_RETURN_ADDRESS(gtid)); + return __kmp_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), + OMPT_GET_RETURN_ADDRESS(0)); } #endif - return __kmpc_omp_taskwait_template(loc_ref, gtid, NULL, NULL); + return __kmp_omp_taskwait_template(loc_ref, gtid, NULL, NULL); } // __kmpc_omp_taskyield: switch to a different task @@ -2336,7 +2342,7 @@ __kmp_assert_valid_gtid(gtid); kmp_info_t *thr = __kmp_threads[gtid]; kmp_int32 nth = thr->th.th_team_nproc; - __kmpc_taskgroup(loc, gtid); // form new taskgroup first + __kmp_aux_taskgroup(loc, gtid); // form new taskgroup first if (nth == 1) { KA_TRACE(10, ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n", @@ -2424,11 +2430,11 @@ Finalize task reduction for a parallel or worksharing. */ void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws) { - __kmpc_end_taskgroup(loc, gtid); + __kmp_aux_end_taskgroup(loc, gtid); } // __kmpc_taskgroup: Start a new taskgroup -void __kmpc_taskgroup(ident_t *loc, int gtid) { +void __forceinline __kmp_taskgroup_impl(ident_t *loc, int gtid) { __kmp_assert_valid_gtid(gtid); kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *taskdata = thread->th.th_current_task; @@ -2445,8 +2451,6 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); kmp_team_t *team = thread->th.th_team; ompt_data_t my_task_data = taskdata->ompt_task_info.task_data; // FIXME: I think this is wrong for lwt! @@ -2458,10 +2462,19 @@ } #endif } +void __kmp_aux_taskgroup(ident_t *loc, int gtid) { + __kmp_taskgroup_impl(loc, gtid); +} +void __kmpc_taskgroup(ident_t *loc, int gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmp_taskgroup_impl(loc, gtid); +} // __kmpc_end_taskgroup: Wait until all tasks generated by the current task // and its descendants are complete -void __kmpc_end_taskgroup(ident_t *loc, int gtid) { +void __kmp_end_taskgroup_impl(ident_t *loc, int gtid) { __kmp_assert_valid_gtid(gtid); kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *taskdata = thread->th.th_current_task; @@ -2479,8 +2492,6 @@ // FIXME: I think this is wrong for lwt! my_parallel_data = team->t.ompt_team_info.parallel_data; codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); - if (!codeptr) - codeptr = OMPT_GET_RETURN_ADDRESS(0); } #endif @@ -2602,6 +2613,15 @@ } #endif } +void __kmp_aux_end_taskgroup(ident_t *loc, int gtid) { + __kmp_end_taskgroup_impl(loc, gtid); +} +void __kmpc_end_taskgroup(ident_t *loc, int gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmp_end_taskgroup_impl(loc, gtid); +} // __kmp_remove_my_task: remove a task from my own deque static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid, @@ -4442,9 +4462,10 @@ Execute the taskloop construct. */ -void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, - kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, - int sched, kmp_uint64 grainsize, void *task_dup) { +void __kmp_taskloop_impl(ident_t *loc, int gtid, kmp_task_t *task, int if_val, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, + int nogroup, int sched, kmp_uint64 grainsize, + void *task_dup) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); KMP_DEBUG_ASSERT(task != NULL); __kmp_assert_valid_gtid(gtid); @@ -4452,7 +4473,7 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_taskgroup(loc, gtid); + __kmp_aux_taskgroup(loc, gtid); } // ========================================================================= @@ -4589,7 +4610,20 @@ #if OMPT_SUPPORT && OMPT_OPTIONAL OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmpc_end_taskgroup(loc, gtid); + __kmp_aux_end_taskgroup(loc, gtid); } KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); } +void __kmp_aux_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, + int nogroup, int sched, kmp_uint64 grainsize, + void *task_dup) { + __kmp_taskloop_impl(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, + grainsize, task_dup); +} +void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, + int sched, kmp_uint64 grainsize, void *task_dup) { + __kmp_taskloop_impl(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, + grainsize, task_dup); +} Index: openmp/runtime/src/ompt-general.cpp =================================================================== --- openmp/runtime/src/ompt-general.cpp +++ openmp/runtime/src/ompt-general.cpp @@ -84,7 +84,7 @@ * global variables ****************************************************************************/ -ompt_callbacks_active_t ompt_enabled; +ompt_callbacks_active_t ompt_enabled{0}; ompt_state_info_t ompt_state_info[] = { #define ompt_state_macro(state, code) {#state, state}, @@ -378,6 +378,7 @@ return; ompt_pre_initialized = 1; + ompt_enabled.initialized = 1; //-------------------------------------------------- // Use a tool iff a tool is enabled and available. @@ -413,6 +414,7 @@ switch (tool_setting) { case omp_tool_disabled: OMPT_VERBOSE_INIT_PRINT("OMP tool disabled. \n"); + ompt_enabled.enabled=0; break; case omp_tool_unset: Index: openmp/runtime/src/ompt-internal.h =================================================================== --- openmp/runtime/src/ompt-internal.h +++ openmp/runtime/src/ompt-internal.h @@ -37,6 +37,7 @@ typedef struct ompt_callbacks_active_s { unsigned int enabled : 1; + unsigned int initialized : 1; #define ompt_event_macro(event, callback, eventid) unsigned int event : 1; FOREACH_OMPT_EVENT(ompt_event_macro) Index: openmp/runtime/src/ompt-specific.h =================================================================== --- openmp/runtime/src/ompt-specific.h +++ openmp/runtime/src/ompt-specific.h @@ -69,6 +69,7 @@ #define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) inline void *__ompt_load_return_address(int gtid) { + if (!ompt_enabled.enabled || gtid < 0) return NULL; kmp_info_t *thr = __kmp_threads[gtid]; void *return_address = thr->th.ompt_thread_info.return_address; thr->th.ompt_thread_info.return_address = NULL; @@ -142,7 +143,10 @@ public: OmptReturnAddressGuard(int Gtid, void *ReturnAddress) : Gtid(Gtid) { - if (ompt_enabled.enabled && Gtid >= 0 && __kmp_threads[Gtid] && + KMP_ASSERT(!ompt_enabled.enabled || Gtid < 0 || + !__kmp_threads[Gtid]->th.ompt_thread_info.return_address); + if ((ompt_enabled.enabled || !ompt_enabled.initialized) && Gtid >= 0 && + __kmp_threads[Gtid] && !__kmp_threads[Gtid]->th.ompt_thread_info.return_address) { SetAddress = true; __kmp_threads[Gtid]->th.ompt_thread_info.return_address = ReturnAddress; Index: openmp/runtime/test/ompt/cancel/cancel_parallel.c =================================================================== --- openmp/runtime/test/ompt/cancel/cancel_parallel.c +++ openmp/runtime/test/ompt/cancel/cancel_parallel.c @@ -1,7 +1,5 @@ // RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s // REQUIRES: ompt -// Current GOMP interface implementation does not support cancellation -// XFAIL: gcc #include "callback.h" #include "omp.h" Index: openmp/runtime/test/ompt/cancel/cancel_taskgroup.c =================================================================== --- openmp/runtime/test/ompt/cancel/cancel_taskgroup.c +++ openmp/runtime/test/ompt/cancel/cancel_taskgroup.c @@ -1,8 +1,8 @@ -// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s -// REQUIRES: ompt +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt // UNSUPPORTED: clang-3, clang-4.0.0 -// Current GOMP interface implementation does not support cancellation; icc 16 has a bug -// XFAIL: gcc, icc-16 +// icc 16 has a bug: +// UNSUPPORTED: icc-16 #include "callback.h" #include @@ -11,8 +11,7 @@ int main() { int condition=0; - #pragma omp parallel num_threads(2) - {} + int nthreads = omp_get_max_threads(); print_frame(0); #pragma omp parallel num_threads(2) @@ -66,11 +65,14 @@ // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_masked_begin: + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], - // CHECK-SAME: task_id=[[PARENT_TASK_ID:[0-9]+]], // CHECK-SAME: codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], + // CHECK-SAME: task_id=[[PARENT_TASK_ID:[0-9]+]], + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[FIRST_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[SECOND_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no Index: openmp/runtime/test/ompt/tasks/serialized.c =================================================================== --- openmp/runtime/test/ompt/tasks/serialized.c +++ openmp/runtime/test/ompt/tasks/serialized.c @@ -28,7 +28,11 @@ print_frame(0); } else { // The exit frame must be our parent! +#ifdef DEBUG + print_frame_from_outlined_fn(0); +#else print_frame_from_outlined_fn(1); +#endif } print_ids(0); print_ids(1);