diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -297,8 +297,8 @@ parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); } ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif #if INCLUDE_SSC_MARKS @@ -713,8 +713,8 @@ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif __kmp_threads[global_tid]->th.th_ident = loc; // TODO: explicit barrier_wait_id: @@ -851,8 +851,8 @@ kmp_team_t *team; ompt_wait_id_t lck; void *codeptr_ra; + OMPT_STORE_RETURN_ADDRESS(gtid); if (ompt_enabled.enabled) { - OMPT_STORE_RETURN_ADDRESS(gtid); team = __kmp_team_from_gtid(gtid); lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; /* OMPT state update */ @@ -1607,8 +1607,8 @@ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -1671,8 +1671,8 @@ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -2069,8 +2069,8 @@ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif /* This barrier is not a barrier region boundary */ #if USE_ITT_NOTIFY @@ -2083,11 +2083,9 @@ // Consider next barrier a user-visible barrier for barrier region boundaries // Nesting checks are already handled by the single construct checks - + { #if OMPT_SUPPORT - if (ompt_enabled.enabled) { OMPT_STORE_RETURN_ADDRESS(gtid); - } #endif #if USE_ITT_NOTIFY __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. @@ -2099,6 +2097,7 @@ ompt_frame->enter_frame = ompt_data_none; } #endif + } } /* -------------------------------------------------------------------------- */ @@ -3462,8 +3461,8 @@ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3651,8 +3650,8 @@ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = @@ -3733,8 +3732,8 @@ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3759,8 +3758,8 @@ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3780,8 +3779,8 @@ __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif // TODO: implicit barrier: should be exposed #if USE_ITT_NOTIFY diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -573,13 +573,17 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ + { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ + (schedule) != kmp_sch_static); \ + } \ + { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ + (kmp_int *)p_ub, (kmp_int *)&stride); \ + } \ if (status) { \ KMP_DEBUG_ASSERT(stride == str); \ *p_ub += (str > 0) ? 1 : -1; \ @@ -609,12 +613,17 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ + { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ + TRUE); \ + } \ + { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ + (kmp_int *)p_ub, (kmp_int *)&stride); \ + } \ if (status) { \ KMP_DEBUG_ASSERT(stride == str); \ *p_ub += (str > 0) ? 1 : -1; \ @@ -1482,12 +1491,13 @@ task, data, num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); + { #if OMPT_SUPPORT OMPT_STORE_RETURN_ADDRESS(gtid); #endif KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - + } task(data); KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h --- a/openmp/runtime/src/ompt-specific.h +++ b/openmp/runtime/src/ompt-specific.h @@ -75,11 +75,13 @@ return return_address; } -#define OMPT_STORE_RETURN_ADDRESS(gtid) \ +/*#define OMPT_STORE_RETURN_ADDRESS(gtid) \ if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ !__kmp_threads[gtid]->th.ompt_thread_info.return_address) \ __kmp_threads[gtid]->th.ompt_thread_info.return_address = \ - __builtin_return_address(0) + __builtin_return_address(0)*/ +#define OMPT_STORE_RETURN_ADDRESS(gtid) \ + OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)}; #define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) #define OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid) \ ((ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ @@ -133,4 +135,23 @@ #define OMPT_REDUCTION_END #endif // ! OMPT_SUPPORT && OMPT_OPTIONAL +class OmptReturnAddressGuard { +private: + bool SetAddress{false}; + int Gtid; + +public: + OmptReturnAddressGuard(int Gtid, void *ReturnAddress) : Gtid(Gtid) { + if (ompt_enabled.enabled && Gtid >= 0 && __kmp_threads[Gtid] && + !__kmp_threads[Gtid]->th.ompt_thread_info.return_address) { + SetAddress = true; + __kmp_threads[Gtid]->th.ompt_thread_info.return_address = ReturnAddress; + } + } + ~OmptReturnAddressGuard() { + if (SetAddress) + __kmp_threads[Gtid]->th.ompt_thread_info.return_address = NULL; + } +}; + #endif diff --git a/openmp/runtime/test/ompt/parallel/repeated_calls.c b/openmp/runtime/test/ompt/parallel/repeated_calls.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/ompt/parallel/repeated_calls.c @@ -0,0 +1,102 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt + +#define USE_PRIVATE_TOOL 1 +#include "callback.h" + +__attribute__((noinline)) +int foo(int x) { +#pragma omp parallel num_threads(2) + { +#pragma omp atomic + x++; + } + return x; +} + +__attribute__((noinline)) +int bar(int x) { +#pragma omp parallel num_threads(2) + { +#pragma omp critical + x++; + } + return x; +} + +int main() { + int y; + y = foo(y); + y = bar(y); + y = foo(y); + return 0; + + // CHECK-NOT: {{^}}0: Could not register callback + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // First call to foo + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin + // CHECK-SAME: {{.*}}codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + + // Call to bar + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin + + // Second call to foo + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin + // CHECK-SAME: {{.*}}codeptr_ra=[[RETURN_ADDRESS]] + +} + +static void on_ompt_callback_thread_begin( + ompt_thread_t thread_type, + ompt_data_t *thread_data) { + if (thread_data->ptr) + printf("%s\n", "0: thread_data initially not null"); + thread_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ":" _TOOL_PREFIX + " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", + ompt_get_thread_data()->value, ompt_thread_t_values[thread_type], + thread_type, thread_data->value); +} + +static void on_ompt_callback_parallel_begin( + ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data, + uint32_t requested_team_size, int flag, const void *codeptr_ra) { + if (parallel_data->ptr) + printf("0: parallel_data initially not null\n"); + parallel_data->value = ompt_get_unique_id(); + int invoker = flag & 0xF; + const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams"; + const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams"; + printf("%" PRIu64 ":" _TOOL_PREFIX + " ompt_event_%s_begin: parent_task_id=%" PRIu64 + ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, " + "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32 + ", codeptr_ra=%p, invoker=%d\n", + ompt_get_thread_data()->value, event, encountering_task_data->value, + encountering_task_frame->exit_frame.ptr, + encountering_task_frame->enter_frame.ptr, parallel_data->value, size, + requested_team_size, codeptr_ra, invoker); +} + +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + ompt_get_unique_id = (ompt_get_unique_id_t)lookup("ompt_get_unique_id"); + ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data"); + + register_callback(ompt_callback_thread_begin); + register_callback(ompt_callback_parallel_begin); + printf("0: NULL_POINTER=%p\n", (void *)NULL); + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +}