Index: openmp/runtime/CMakeLists.txt =================================================================== --- openmp/runtime/CMakeLists.txt +++ openmp/runtime/CMakeLists.txt @@ -329,6 +329,20 @@ libomp_error_say("TSAN functionality requested but not available") endif() +# OMPD-support +# Enable if OMPT SUPPORT is ON +set(OMPD_DEFAULT FALSE) +if (LIBOMP_HAVE_OMPT_SUPPORT AND ("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")) + set(OMPD_DEFAULT TRUE) +endif() + +set(LIBOMP_OMPD_SUPPORT ${OMPD_DEFAULT} CACHE BOOL + "OMPD-support?") + +if(LIBOMP_OMPD_SUPPORT AND ((NOT LIBOMP_OMPT_SUPPORT) OR (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Linux"))) + libomp_error_say("OpenMP Debug Interface(OMPD) requested but not available in this implementation") +endif() + # Error check hwloc support after config-ix has run if(LIBOMP_USE_HWLOC AND (NOT LIBOMP_HAVE_HWLOC)) libomp_error_say("Hwloc requested but not available") @@ -389,6 +403,7 @@ if(${LIBOMP_OMPT_SUPPORT}) libomp_say("Use OMPT-optional -- ${LIBOMP_OMPT_OPTIONAL}") endif() + libomp_say("Use OMPD-support -- ${LIBOMP_OMPD_SUPPORT}") libomp_say("Use Adaptive locks -- ${LIBOMP_USE_ADAPTIVE_LOCKS}") libomp_say("Use quad precision -- ${LIBOMP_USE_QUAD_PRECISION}") libomp_say("Use TSAN-support -- ${LIBOMP_TSAN_SUPPORT}") @@ -401,3 +416,5 @@ # make these variables available for tools: set(LIBOMP_LIBRARY_DIR ${LIBOMP_LIBRARY_DIR} PARENT_SCOPE) set(LIBOMP_INCLUDE_DIR ${LIBOMP_INCLUDE_DIR} PARENT_SCOPE) +# make these variables available for tools/libompd: +set(LIBOMP_SRC_DIR ${LIBOMP_SRC_DIR} PARENT_SCOPE) Index: openmp/runtime/src/CMakeLists.txt =================================================================== --- openmp/runtime/src/CMakeLists.txt +++ openmp/runtime/src/CMakeLists.txt @@ -113,6 +113,7 @@ libomp_append(LIBOMP_CXXFILES kmp_version.cpp) libomp_append(LIBOMP_CXXFILES ompt-general.cpp IF_TRUE LIBOMP_OMPT_SUPPORT) libomp_append(LIBOMP_CXXFILES tsan_annotations.cpp IF_TRUE LIBOMP_TSAN_SUPPORT) +libomp_append(LIBOMP_CXXFILES ompd-specific.cpp IF_TRUE LIBOMP_OMPD_SUPPORT) set(LIBOMP_SOURCE_FILES ${LIBOMP_CXXFILES} ${LIBOMP_ASMFILES}) # For Windows, there is a resource file (.rc -> .res) that is also compiled @@ -186,6 +187,7 @@ WORKING_DIRECTORY ${LIBOMP_LIBRARY_DIR} ) endif() +set(LIBOMP_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) # Create *.inc before compiling any sources # objects depend on : .inc files @@ -204,6 +206,7 @@ libomp_append(LIBOMP_MASM_DEFINITIONS "-D_M_IA32" IF_TRUE IA32) libomp_append(LIBOMP_MASM_DEFINITIONS "-D_M_AMD64" IF_TRUE INTEL64) libomp_append(LIBOMP_MASM_DEFINITIONS "-DOMPT_SUPPORT" IF_TRUE_1_0 LIBOMP_OMPT_SUPPORT) + libomp_append(LIBOMP_MASM_DEFINITIONS "-DOMPD_SUPPORT" IF_TRUE_1_0 LIBOMP_OMPD_SUPPORT) libomp_list_to_string("${LIBOMP_MASM_DEFINITIONS}" LIBOMP_MASM_DEFINITIONS) set_property(SOURCE z_Windows_NT-586_asm.asm APPEND_STRING PROPERTY COMPILE_FLAGS " ${LIBOMP_MASM_DEFINITIONS}") set_source_files_properties(thirdparty/ittnotify/ittnotify_static.cpp PROPERTIES COMPILE_DEFINITIONS "UNICODE") Index: openmp/runtime/src/include/omp-tools.h.var =================================================================== --- openmp/runtime/src/include/omp-tools.h.var +++ openmp/runtime/src/include/omp-tools.h.var @@ -483,6 +483,8 @@ ompd_rc_device_read_error = 8, ompd_rc_device_write_error = 9, ompd_rc_nomem = 10, + ompd_rc_incomplete = 11, + ompd_rc_callback_error = 12 } ompd_rc_t; typedef void (*ompt_interface_fn_t) (void); @@ -1133,6 +1135,198 @@ uint8_t sizeof_pointer; } ompd_device_type_sizes_t; +void ompd_dll_locations_valid(void); + +typedef ompd_rc_t (*ompd_callback_memory_alloc_fn_t)(ompd_size_t nbytes, + void **ptr); + +typedef ompd_rc_t (*ompd_callback_memory_free_fn_t)(void *ptr); + +typedef ompd_rc_t (*ompd_callback_get_thread_context_for_thread_id_fn_t)( + ompd_address_space_context_t *address_space_context, ompd_thread_id_t kind, + ompd_size_t sizeof_thread_id, const void *thread_id, + ompd_thread_context_t **thread_context); + +typedef ompd_rc_t (*ompd_callback_sizeof_fn_t)( + ompd_address_space_context_t *address_space_context, + ompd_device_type_sizes_t *sizes); + +typedef ompd_rc_t (*ompd_callback_symbol_addr_fn_t)( + ompd_address_space_context_t *address_space_context, + ompd_thread_context_t *thread_context, const char *symbol_name, + ompd_address_t *symbol_addr, const char *file_name); + +typedef ompd_rc_t (*ompd_callback_memory_read_fn_t)( + ompd_address_space_context_t *address_space_context, + ompd_thread_context_t *thread_context, const ompd_address_t *addr, + ompd_size_t nbytes, void *buffer); + +typedef ompd_rc_t (*ompd_callback_memory_write_fn_t)( + ompd_address_space_context_t *address_space_context, + ompd_thread_context_t *thread_context, const ompd_address_t *addr, + ompd_size_t nbytes, const void *buffer); + +typedef ompd_rc_t (*ompd_callback_device_host_fn_t)( + ompd_address_space_context_t *address_space_context, const void *input, + ompd_size_t unit_size, ompd_size_t count, void *output); + +typedef ompd_rc_t (*ompd_callback_print_string_fn_t)(const char *string, + int category); + +typedef struct ompd_callbacks_t { + ompd_callback_memory_alloc_fn_t alloc_memory; + ompd_callback_memory_free_fn_t free_memory; + ompd_callback_print_string_fn_t print_string; + ompd_callback_sizeof_fn_t sizeof_type; + ompd_callback_symbol_addr_fn_t symbol_addr_lookup; + ompd_callback_memory_read_fn_t read_memory; + ompd_callback_memory_write_fn_t write_memory; + ompd_callback_memory_read_fn_t read_string; + ompd_callback_device_host_fn_t device_to_host; + ompd_callback_device_host_fn_t host_to_device; + ompd_callback_get_thread_context_for_thread_id_fn_t + get_thread_context_for_thread_id; +} ompd_callbacks_t; + +void ompd_bp_parallel_begin(void); + +void ompd_bp_parallel_end(void); + +void ompd_bp_task_begin(void); + +void ompd_bp_task_end(void); + +void ompd_bp_thread_begin(void); + +void ompd_bp_thread_end(void); + +void ompd_bp_device_begin(void); + +void ompd_bp_device_end(void); + +ompd_rc_t ompd_initialize(ompd_word_t api_version, + const ompd_callbacks_t *callbacks); + +ompd_rc_t ompd_get_api_version(ompd_word_t *version); + +ompd_rc_t ompd_get_version_string(const char **string); + +ompd_rc_t ompd_finalize(void); + +ompd_rc_t ompd_process_initialize(ompd_address_space_context_t *context, + ompd_address_space_handle_t **handle); + +ompd_rc_t ompd_device_initialize(ompd_address_space_handle_t *process_handle, + ompd_address_space_context_t *device_context, + ompd_device_t kind, ompd_size_t sizeof_id, + void *id, + ompd_address_space_handle_t **device_handle); + +ompd_rc_t ompd_rel_address_space_handle(ompd_address_space_handle_t *handle); + +ompd_rc_t ompd_get_omp_version(ompd_address_space_handle_t *address_space, + ompd_word_t *omp_version); + +ompd_rc_t +ompd_get_omp_version_string(ompd_address_space_handle_t *address_space, + const char **string); + +ompd_rc_t ompd_get_thread_in_parallel(ompd_parallel_handle_t *parallel_handle, + int thread_num, + ompd_thread_handle_t **thread_handle); + +ompd_rc_t ompd_get_thread_handle(ompd_address_space_handle_t *handle, + ompd_thread_id_t kind, + ompd_size_t sizeof_thread_id, + const void *thread_id, + ompd_thread_handle_t **thread_handle); + +ompd_rc_t ompd_rel_thread_handle(ompd_thread_handle_t *thread_handle); + +ompd_rc_t ompd_thread_handle_compare(ompd_thread_handle_t *thread_handle_1, + ompd_thread_handle_t *thread_handle_2, + int *cmp_value); + +ompd_rc_t ompd_get_thread_id(ompd_thread_handle_t *thread_handle, + ompd_thread_id_t kind, + ompd_size_t sizeof_thread_id, void *thread_id); + +ompd_rc_t +ompd_get_curr_parallel_handle(ompd_thread_handle_t *thread_handle, + ompd_parallel_handle_t **parallel_handle); + +ompd_rc_t ompd_get_enclosing_parallel_handle( + ompd_parallel_handle_t *parallel_handle, + ompd_parallel_handle_t **enclosing_parallel_handle); + +ompd_rc_t +ompd_get_task_parallel_handle(ompd_task_handle_t *task_handle, + ompd_parallel_handle_t **task_parallel_handle); + +ompd_rc_t ompd_rel_parallel_handle(ompd_parallel_handle_t *parallel_handle); + +ompd_rc_t +ompd_parallel_handle_compare(ompd_parallel_handle_t *parallel_handle_1, + ompd_parallel_handle_t *parallel_handle_2, + int *cmp_value); + +ompd_rc_t ompd_get_curr_task_handle(ompd_thread_handle_t *thread_handle, + ompd_task_handle_t **task_handle); + +ompd_rc_t +ompd_get_generating_task_handle(ompd_task_handle_t *task_handle, + ompd_task_handle_t **generating_task_handle); + +ompd_rc_t +ompd_get_scheduling_task_handle(ompd_task_handle_t *task_handle, + ompd_task_handle_t **scheduling_task_handle); + +ompd_rc_t ompd_get_task_in_parallel(ompd_parallel_handle_t *parallel_handle, + int thread_num, + ompd_task_handle_t **task_handle); + +ompd_rc_t ompd_rel_task_handle(ompd_task_handle_t *task_handle); + +ompd_rc_t ompd_task_handle_compare(ompd_task_handle_t *task_handle_1, + ompd_task_handle_t *task_handle_2, + int *cmp_value); + +ompd_rc_t ompd_get_task_function(ompd_task_handle_t *task_handle, + ompd_address_t *entry_point); + +ompd_rc_t ompd_get_task_frame(ompd_task_handle_t *task_handle, + ompd_frame_info_t *exit_frame, + ompd_frame_info_t *enter_frame); + +ompd_rc_t +ompd_enumerate_states(ompd_address_space_handle_t *address_space_handle, + ompd_word_t current_state, ompd_word_t *next_state, + const char **next_state_name, ompd_word_t *more_enums); + +ompd_rc_t ompd_get_state(ompd_thread_handle_t *thread_handle, + ompd_word_t *state, ompt_wait_id_t *wait_id); + +ompd_rc_t +ompd_get_display_control_vars(ompd_address_space_handle_t *address_space_handle, + const char *const **control_vars); + +ompd_rc_t ompd_rel_display_control_vars(const char *const **control_vars); + +ompd_rc_t ompd_enumerate_icvs(ompd_address_space_handle_t *handle, + ompd_icv_id_t current, ompd_icv_id_t *next_id, + const char **next_icv_name, + ompd_scope_t *next_scope, int *more); + +ompd_rc_t ompd_get_icv_from_scope(void *handle, ompd_scope_t scope, + ompd_icv_id_t icv_id, ompd_word_t *icv_value); + +ompd_rc_t ompd_get_icv_string_from_scope(void *handle, ompd_scope_t scope, + ompd_icv_id_t icv_id, + const char **icv_string); + +ompd_rc_t ompd_get_tool_data(void *handle, ompd_scope_t scope, + ompd_word_t *value, ompd_address_t *ptr); + typedef struct ompt_record_ompt_t { ompt_callbacks_t type; ompt_device_time_t time; Index: openmp/runtime/src/kmp.h =================================================================== --- openmp/runtime/src/kmp.h +++ openmp/runtime/src/kmp.h @@ -138,6 +138,10 @@ #include "ompt-internal.h" #endif +#if OMPD_SUPPORT +#include "ompd-specific.h" +#endif + #ifndef UNLIKELY #define UNLIKELY(x) (x) #endif @@ -863,6 +867,10 @@ extern int __kmp_display_affinity; extern char *__kmp_affinity_format; static const size_t KMP_AFFINITY_FORMAT_SIZE = 512; +#if OMPT_SUPPORT +extern int __kmp_tool; +extern char *__kmp_tool_libraries; +#endif // OMPT_SUPPORT #if KMP_AFFINITY_SUPPORTED #define KMP_PLACE_ALL (-1) Index: openmp/runtime/src/kmp_config.h.cmake =================================================================== --- openmp/runtime/src/kmp_config.h.cmake +++ openmp/runtime/src/kmp_config.h.cmake @@ -44,6 +44,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT +#cmakedefine01 LIBOMP_OMPD_SUPPORT +#define OMPD_SUPPORT LIBOMP_OMPD_SUPPORT #cmakedefine01 LIBOMP_PROFILING_SUPPORT #define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT #cmakedefine01 LIBOMP_OMPT_OPTIONAL Index: openmp/runtime/src/kmp_csupport.cpp =================================================================== --- openmp/runtime/src/kmp_csupport.cpp +++ openmp/runtime/src/kmp_csupport.cpp @@ -604,6 +604,11 @@ } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_parallel_end(); +#endif + this_thr->th.th_team = serial_team->t.t_parent; this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; Index: openmp/runtime/src/kmp_gsupport.cpp =================================================================== --- openmp/runtime/src/kmp_gsupport.cpp +++ openmp/runtime/src/kmp_gsupport.cpp @@ -498,6 +498,10 @@ frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #endif +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_parallel_begin(); +#endif } void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) { @@ -528,6 +532,10 @@ fork_context_gnu #endif ); +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_parallel_end(); +#endif } // Loop worksharing constructs Index: openmp/runtime/src/kmp_runtime.cpp =================================================================== --- openmp/runtime/src/kmp_runtime.cpp +++ openmp/runtime/src/kmp_runtime.cpp @@ -31,6 +31,9 @@ #if OMPT_SUPPORT #include "ompt-specific.h" #endif +#if OMPD_SUPPORT +#include "ompd-specific.h" +#endif #if OMP_PROFILING_SUPPORT #include "llvm/Support/TimeProfiler.h" @@ -1337,6 +1340,10 @@ this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); +#if OMPD_SUPPORT + OMPT_CUR_TASK_INFO(this_thr)->scheduling_parent = + this_thr->th.th_current_task->td_parent; +#endif } #endif } @@ -1470,6 +1477,10 @@ return TRUE; } +#if OMPD_SUPPORT + parent_team->t.t_pkfn = microtask; +#endif + #if OMPT_SUPPORT void *dummy; void **exit_frame_p; @@ -1494,6 +1505,10 @@ implicit_task_data, 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); } +#if OMPD_SUPPORT + OMPT_CUR_TASK_INFO(master_th)->scheduling_parent = + master_th->th.th_current_task->td_parent; +#endif /* OMPT state */ master_th->th.ompt_thread_info.state = ompt_state_work_parallel; @@ -1694,6 +1709,10 @@ __kmpc_serialized_parallel(loc, gtid); +#if OMPD_SUPPORT + master_th->th.th_serial_team->t.t_pkfn = microtask; +#endif + if (call_context == fork_context_intel) { /* TODO this sucks, use the compiler itself to pass args! :) */ master_th->th.th_serial_team->t.t_ident = loc; @@ -1837,6 +1856,10 @@ OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid); } +#if OMPD_SUPPORT + OMPT_CUR_TASK_INFO(master_th)->scheduling_parent = + master_th->th.th_current_task->td_parent; +#endif /* OMPT state */ master_th->th.ompt_thread_info.state = ompt_state_work_parallel; @@ -1865,6 +1888,10 @@ OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); } +#if OMPD_SUPPORT + OMPT_CUR_TASK_INFO(master_th)->scheduling_parent = + master_th->th.th_current_task->td_parent; +#endif ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); __ompt_lw_taskteam_unlink(master_th); @@ -2020,6 +2047,10 @@ // Update the floating point rounding in the team if required. propagateFPControl(team); +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_parallel_begin(); +#endif if (__kmp_tasking_mode != tskm_immediate_exec) { // Set primary thread's task team to team's task team. Unless this is hot @@ -2212,7 +2243,6 @@ KMP_MB(); /* Flush all pending memory write invalidates. */ KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); - #if OMPT_SUPPORT if (ompt_enabled.enabled) { master_th->th.ompt_thread_info.state = ompt_state_overhead; @@ -2488,6 +2518,10 @@ #endif // KMP_AFFINITY_SUPPORTED master_th->th.th_def_allocator = team->t.t_def_allocator; +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_parallel_end(); +#endif updateHWFPControl(team); if (root->r.r_active != master_active) @@ -3841,6 +3875,10 @@ ompt_set_thread_state(root_thread, ompt_state_work_serial); } #endif +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_thread_begin(); +#endif KMP_MB(); __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); @@ -3924,6 +3962,11 @@ __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread); #endif /* KMP_OS_WINDOWS */ +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_thread_end(); +#endif + #if OMPT_SUPPORT ompt_data_t *task_data; ompt_data_t *parallel_data; @@ -5750,6 +5793,11 @@ this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak? } +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_thread_begin(); +#endif + #if OMPT_SUPPORT ompt_data_t *thread_data = nullptr; if (ompt_enabled.enabled) { @@ -5827,6 +5875,11 @@ } TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_thread_end(); +#endif + #if OMPT_SUPPORT if (ompt_enabled.ompt_callback_thread_end) { ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data); @@ -6688,6 +6741,10 @@ #if OMPT_SUPPORT ompt_pre_init(); #endif +#if OMPD_SUPPORT + __kmp_env_dump(); + ompd_init(); +#endif __kmp_validate_locks(); @@ -7831,6 +7888,13 @@ #else __kmp_cleanup_user_locks(); #endif +#if OMPD_SUPPORT + if (ompd_state) { + __kmp_free(ompd_env_block); + ompd_env_block = NULL; + ompd_env_block_size = 0; + } +#endif #if KMP_AFFINITY_SUPPORTED KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file)); Index: openmp/runtime/src/kmp_settings.h =================================================================== --- openmp/runtime/src/kmp_settings.h +++ openmp/runtime/src/kmp_settings.h @@ -18,6 +18,9 @@ void __kmp_env_print(); void __kmp_env_print_2(); void __kmp_display_env_impl(int display_env, int display_env_verbose); +#if OMPD_SUPPORT +void __kmp_env_dump(); +#endif int __kmp_initial_threads_capacity(int req_nproc); void __kmp_init_dflt_team_nth(); Index: openmp/runtime/src/kmp_settings.cpp =================================================================== --- openmp/runtime/src/kmp_settings.cpp +++ openmp/runtime/src/kmp_settings.cpp @@ -25,6 +25,9 @@ #include "kmp_str.h" #include "kmp_wrapper_getpid.h" #include // toupper() +#if OMPD_SUPPORT +#include "ompd-specific.h" +#endif static int __kmp_env_toPrint(char const *name, int flag); @@ -5030,7 +5033,7 @@ } // __kmp_stg_print_omp_cancellation #if OMPT_SUPPORT -static int __kmp_tool = 1; +int __kmp_tool = 1; static void __kmp_stg_parse_omp_tool(char const *name, char const *value, void *data) { @@ -5047,7 +5050,7 @@ } } // __kmp_stg_print_omp_tool -static char *__kmp_tool_libraries = NULL; +char *__kmp_tool_libraries = NULL; static void __kmp_stg_parse_omp_tool_libraries(char const *name, char const *value, void *data) { @@ -5068,7 +5071,7 @@ } } // __kmp_stg_print_omp_tool_libraries -static char *__kmp_tool_verbose_init = NULL; +char *__kmp_tool_verbose_init = NULL; static void __kmp_stg_parse_omp_tool_verbose_init(char const *name, char const *value, @@ -6174,4 +6177,47 @@ __kmp_printf("\n"); } +#if OMPD_SUPPORT +// Dump environment variables for OMPD +void __kmp_env_dump() { + + kmp_env_blk_t block; + kmp_str_buf_t buffer, env, notdefined; + + __kmp_stg_init(); + __kmp_str_buf_init(&buffer); + __kmp_str_buf_init(&env); + __kmp_str_buf_init(¬defined); + + __kmp_env_blk_init(&block, NULL); + __kmp_env_blk_sort(&block); + + __kmp_str_buf_print(¬defined, ": %s", KMP_I18N_STR(NotDefined)); + + for (int i = 0; i < __kmp_stg_count; ++i) { + if (__kmp_stg_table[i].print == NULL) + continue; + __kmp_str_buf_clear(&env); + __kmp_stg_table[i].print(&env, __kmp_stg_table[i].name, + __kmp_stg_table[i].data); + if (env.used < 4) // valid definition must have indents (3) and a new line + continue; + if (strstr(env.str, notdefined.str)) + // normalize the string + __kmp_str_buf_print(&buffer, "%s=undefined\n", __kmp_stg_table[i].name); + else + __kmp_str_buf_cat(&buffer, env.str + 3, env.used - 3); + } + + ompd_env_block = (char *)__kmp_allocate(buffer.used + 1); + KMP_MEMCPY(ompd_env_block, buffer.str, buffer.used + 1); + ompd_env_block_size = (ompd_size_t)KMP_STRLEN(ompd_env_block); + + __kmp_env_blk_free(&block); + __kmp_str_buf_free(&buffer); + __kmp_str_buf_free(&env); + __kmp_str_buf_free(¬defined); +} +#endif // OMPD_SUPPORT + // end of file Index: openmp/runtime/src/kmp_tasking.cpp =================================================================== --- openmp/runtime/src/kmp_tasking.cpp +++ openmp/runtime/src/kmp_tasking.cpp @@ -1603,6 +1603,11 @@ __ompt_task_start(task, current_task, gtid); #endif +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_task_begin(); +#endif + #if USE_ITT_BUILD && USE_ITT_NOTIFY kmp_uint64 cur_time; kmp_int32 kmp_itt_count_task = @@ -1639,6 +1644,11 @@ #endif } +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_task_end(); +#endif + // Proxy tasks are not handled by the runtime if (taskdata->td_flags.proxy != TASK_PROXY) { ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent); Index: openmp/runtime/src/kmp_wait_release.h =================================================================== --- openmp/runtime/src/kmp_wait_release.h +++ openmp/runtime/src/kmp_wait_release.h @@ -1026,9 +1026,18 @@ int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), kmp_int32 is_constrained) { +#if OMPD_SUPPORT + int ret = __kmp_execute_tasks_oncore( + this_thr, gtid, this, final_spin, + thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); + if (ompd_state & OMPD_ENABLE_BP) + ompd_bp_task_end(); + return ret; +#else return __kmp_execute_tasks_oncore( this_thr, gtid, this, final_spin, thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); +#endif } kmp_uint8 *get_stolen() { return NULL; } enum barrier_type get_bt() { return bt; } Index: openmp/runtime/src/ompd-specific.h =================================================================== --- /dev/null +++ openmp/runtime/src/ompd-specific.h @@ -0,0 +1,159 @@ +/* + * ompd-specific.h -- OpenMP debug support + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "kmp.h" +#include "omp-tools.h" +#include + +#ifndef __OMPD_SPECIFIC_H__ +#define __OMPD_SPECIFIC_H__ + +#if OMPD_SUPPORT + +void ompd_init(); +extern int ompd_rtl_version; + +#ifdef __cplusplus +extern "C" { +#endif +extern char *ompd_env_block; +extern ompd_size_t ompd_env_block_size; +extern char *__kmp_tool_verbose_init; +#ifdef __cplusplus +} /* extern "C" */ +#endif + +extern uint64_t ompd_state; +#define OMPD_ENABLE_BP 0x1 + +#define OMPD_FOREACH_ACCESS(OMPD_ACCESS) \ + OMPD_ACCESS(kmp_base_info_t, th_current_task) \ + OMPD_ACCESS(kmp_base_info_t, th_team) \ + OMPD_ACCESS(kmp_base_info_t, th_info) \ + OMPD_ACCESS(kmp_base_info_t, ompt_thread_info) \ + \ + OMPD_ACCESS(kmp_base_root_t, r_in_parallel) \ + \ + OMPD_ACCESS(kmp_base_team_t, ompt_team_info) \ + OMPD_ACCESS(kmp_base_team_t, ompt_serialized_team_info) \ + OMPD_ACCESS(kmp_base_team_t, t_active_level) \ + OMPD_ACCESS(kmp_base_team_t, t_implicit_task_taskdata) \ + OMPD_ACCESS(kmp_base_team_t, t_master_tid) \ + OMPD_ACCESS(kmp_base_team_t, t_nproc) \ + OMPD_ACCESS(kmp_base_team_t, t_level) \ + OMPD_ACCESS(kmp_base_team_t, t_parent) \ + OMPD_ACCESS(kmp_base_team_t, t_pkfn) \ + OMPD_ACCESS(kmp_base_team_t, t_threads) \ + \ + OMPD_ACCESS(kmp_desc_t, ds) \ + \ + OMPD_ACCESS(kmp_desc_base_t, ds_thread) \ + OMPD_ACCESS(kmp_desc_base_t, ds_tid) \ + \ + OMPD_ACCESS(kmp_info_t, th) \ + \ + OMPD_ACCESS(kmp_r_sched_t, r_sched_type) \ + OMPD_ACCESS(kmp_r_sched_t, chunk) \ + \ + OMPD_ACCESS(kmp_root_t, r) \ + \ + OMPD_ACCESS(kmp_internal_control_t, dynamic) \ + OMPD_ACCESS(kmp_internal_control_t, max_active_levels) \ + OMPD_ACCESS(kmp_internal_control_t, nproc) \ + OMPD_ACCESS(kmp_internal_control_t, proc_bind) \ + OMPD_ACCESS(kmp_internal_control_t, sched) \ + OMPD_ACCESS(kmp_internal_control_t, default_device) \ + OMPD_ACCESS(kmp_internal_control_t, thread_limit) \ + \ + OMPD_ACCESS(kmp_taskdata_t, ompt_task_info) \ + OMPD_ACCESS(kmp_taskdata_t, td_flags) \ + OMPD_ACCESS(kmp_taskdata_t, td_icvs) \ + OMPD_ACCESS(kmp_taskdata_t, td_parent) \ + OMPD_ACCESS(kmp_taskdata_t, td_team) \ + \ + OMPD_ACCESS(kmp_task_t, routine) \ + \ + OMPD_ACCESS(kmp_team_p, t) \ + \ + OMPD_ACCESS(kmp_nested_nthreads_t, used) \ + OMPD_ACCESS(kmp_nested_nthreads_t, nth) \ + \ + OMPD_ACCESS(kmp_nested_proc_bind_t, used) \ + OMPD_ACCESS(kmp_nested_proc_bind_t, bind_types) \ + \ + OMPD_ACCESS(ompt_task_info_t, frame) \ + OMPD_ACCESS(ompt_task_info_t, scheduling_parent) \ + OMPD_ACCESS(ompt_task_info_t, task_data) \ + \ + OMPD_ACCESS(ompt_team_info_t, parallel_data) \ + \ + OMPD_ACCESS(ompt_thread_info_t, state) \ + OMPD_ACCESS(ompt_thread_info_t, wait_id) \ + OMPD_ACCESS(ompt_thread_info_t, thread_data) \ + \ + OMPD_ACCESS(ompt_data_t, value) \ + OMPD_ACCESS(ompt_data_t, ptr) \ + \ + OMPD_ACCESS(ompt_frame_t, exit_frame) \ + OMPD_ACCESS(ompt_frame_t, enter_frame) \ + \ + OMPD_ACCESS(ompt_lw_taskteam_t, parent) \ + OMPD_ACCESS(ompt_lw_taskteam_t, ompt_team_info) \ + OMPD_ACCESS(ompt_lw_taskteam_t, ompt_task_info) + +#define OMPD_FOREACH_BITFIELD(OMPD_BITFIELD) \ + OMPD_BITFIELD(kmp_tasking_flags_t, final) \ + OMPD_BITFIELD(kmp_tasking_flags_t, tiedness) \ + OMPD_BITFIELD(kmp_tasking_flags_t, tasktype) \ + OMPD_BITFIELD(kmp_tasking_flags_t, task_serial) \ + OMPD_BITFIELD(kmp_tasking_flags_t, tasking_ser) \ + OMPD_BITFIELD(kmp_tasking_flags_t, team_serial) \ + OMPD_BITFIELD(kmp_tasking_flags_t, started) \ + OMPD_BITFIELD(kmp_tasking_flags_t, executing) \ + OMPD_BITFIELD(kmp_tasking_flags_t, complete) \ + OMPD_BITFIELD(kmp_tasking_flags_t, freed) \ + OMPD_BITFIELD(kmp_tasking_flags_t, native) + +// TODO: (mr) this is a hack to cast cuda contexts to 64 bit values +typedef uint64_t ompd_cuda_context_ptr_t; + +#define OMPD_FOREACH_SIZEOF(OMPD_SIZEOF) \ + OMPD_SIZEOF(kmp_info_t) \ + OMPD_SIZEOF(kmp_taskdata_t) \ + OMPD_SIZEOF(kmp_task_t) \ + OMPD_SIZEOF(kmp_tasking_flags_t) \ + OMPD_SIZEOF(kmp_thread_t) \ + OMPD_SIZEOF(ompt_data_t) \ + OMPD_SIZEOF(ompt_id_t) \ + OMPD_SIZEOF(__kmp_avail_proc) \ + OMPD_SIZEOF(__kmp_max_nth) \ + OMPD_SIZEOF(__kmp_stksize) \ + OMPD_SIZEOF(__kmp_omp_cancellation) \ + OMPD_SIZEOF(__kmp_max_task_priority) \ + OMPD_SIZEOF(__kmp_display_affinity) \ + OMPD_SIZEOF(__kmp_affinity_format) \ + OMPD_SIZEOF(__kmp_tool_libraries) \ + OMPD_SIZEOF(__kmp_tool_verbose_init) \ + OMPD_SIZEOF(__kmp_tool) \ + OMPD_SIZEOF(ompd_state) \ + OMPD_SIZEOF(kmp_nested_nthreads_t) \ + OMPD_SIZEOF(__kmp_nested_nth) \ + OMPD_SIZEOF(kmp_nested_proc_bind_t) \ + OMPD_SIZEOF(__kmp_nested_proc_bind) \ + OMPD_SIZEOF(int) \ + OMPD_SIZEOF(char) \ + OMPD_SIZEOF(__kmp_gtid) \ + OMPD_SIZEOF(__kmp_nth) \ + OMPD_SIZEOF(ompd_cuda_context_ptr_t) + +#endif /* OMPD_SUPPORT */ +#endif Index: openmp/runtime/src/ompd-specific.cpp =================================================================== --- /dev/null +++ openmp/runtime/src/ompd-specific.cpp @@ -0,0 +1,157 @@ +/* + * ompd-specific.cpp -- OpenMP debug support + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ompd-specific.h" + +#if OMPD_SUPPORT + +/** + * Declaration of symbols to hold struct size and member offset information + */ + +#define ompd_declare_access(t, m) uint64_t ompd_access__##t##__##m; +OMPD_FOREACH_ACCESS(ompd_declare_access) +#undef ompd_declare_access + +#define ompd_declare_sizeof_member(t, m) uint64_t ompd_sizeof__##t##__##m; +OMPD_FOREACH_ACCESS(ompd_declare_sizeof_member) +#undef ompd_declare_sizeof_member + +#define ompd_declare_bitfield(t, m) uint64_t ompd_bitfield__##t##__##m; +OMPD_FOREACH_BITFIELD(ompd_declare_bitfield) +#undef ompd_declare_bitfield + +#define ompd_declare_sizeof(t) uint64_t ompd_sizeof__##t; +OMPD_FOREACH_SIZEOF(ompd_declare_sizeof) +#undef ompd_declare_sizeof + +volatile const char **ompd_dll_locations = NULL; +uint64_t ompd_state = 0; + +int ompd_rtl_version = 7; + +char *ompd_env_block = NULL; +ompd_size_t ompd_env_block_size = 0; + +void ompd_init() { + + static int ompd_initialized = 0; + + if (ompd_initialized) + return; + + /** + * Calculate member offsets for structs and unions + */ + +#define ompd_init_access(t, m) \ + ompd_access__##t##__##m = (uint64_t) & (((t *)0)->m); + OMPD_FOREACH_ACCESS(ompd_init_access) +#undef ompd_init_access + + /** + * Create bit mask for bitfield access + */ + +#define ompd_init_bitfield(t, m) \ + ompd_bitfield__##t##__##m = 0; \ + ((t *)(&ompd_bitfield__##t##__##m))->m = 1; + OMPD_FOREACH_BITFIELD(ompd_init_bitfield) +#undef ompd_init_bitfield + + /** + * Calculate type size information + */ + +#define ompd_init_sizeof_member(t, m) \ + ompd_sizeof__##t##__##m = sizeof(((t *)0)->m); + OMPD_FOREACH_ACCESS(ompd_init_sizeof_member) +#undef ompd_init_sizeof_member + +#define ompd_init_sizeof(t) ompd_sizeof__##t = sizeof(t); + OMPD_FOREACH_SIZEOF(ompd_init_sizeof) +#undef ompd_init_sizeof + + char *libname = ""; + +#if KMP_OS_UNIX + // Find the location of libomp.so thru dladdr and replace the libomp with + // libompd to get the full path of libompd + Dl_info dl_info; + int ret = dladdr((void *)ompd_init, &dl_info); + if (!ret) { + fprintf(stderr, "%s\n", dlerror()); + } + int lib_path_length; + if (strrchr(dl_info.dli_fname, '/')) { + lib_path_length = strrchr(dl_info.dli_fname, '/') - dl_info.dli_fname; + + libname = + (char *)malloc(lib_path_length + 12 /*for '/libompd.so' and '\0'*/); + strcpy(libname, dl_info.dli_fname); + memcpy(strrchr(libname, '/'), "/libompd.so\0", 12); + } +#endif + + const char *ompd_env_var = getenv("OMP_DEBUG"); + if (ompd_env_var && !strcmp(ompd_env_var, "enabled")) { + fprintf(stderr, "OMP_OMPD active\n"); + ompt_enabled.enabled = 1; + ompd_state |= OMPD_ENABLE_BP; + } + + ompd_initialized = 1; + ompd_dll_locations = (volatile const char **)malloc(3 * sizeof(const char *)); + ompd_dll_locations[0] = "libompd.so"; + ompd_dll_locations[1] = libname; + ompd_dll_locations[2] = NULL; + ompd_dll_locations_valid(); +} + +void __attribute__((noinline)) ompd_dll_locations_valid(void) { + /* naive way of implementing hard to opt-out empty function + we might want to use a separate object file? */ + asm(""); +} + +void ompd_bp_parallel_begin(void) { + /* naive way of implementing hard to opt-out empty function + we might want to use a separate object file? */ + asm(""); +} +void ompd_bp_parallel_end(void) { + /* naive way of implementing hard to opt-out empty function + we might want to use a separate object file? */ + asm(""); +} +void ompd_bp_task_begin(void) { + /* naive way of implementing hard to opt-out empty function + we might want to use a separate object file? */ + asm(""); +} +void ompd_bp_task_end(void) { + /* naive way of implementing hard to opt-out empty function + we might want to use a separate object file? */ + asm(""); +} +void ompd_bp_thread_begin(void) { + /* naive way of implementing hard to opt-out empty function + we might want to use a separate object file? */ + asm(""); +} +void ompd_bp_thread_end(void) { + /* naive way of implementing hard to opt-out empty function + we might want to use a separate object file? */ + asm(""); +} + +#endif /* OMPD_SUPPORT */ Index: openmp/runtime/src/ompt-general.cpp =================================================================== --- openmp/runtime/src/ompt-general.cpp +++ openmp/runtime/src/ompt-general.cpp @@ -501,7 +501,11 @@ } void ompt_fini() { - if (ompt_enabled.enabled) { + if (ompt_enabled.enabled +#if OMPD_SUPPORT + && ompt_start_tool_result && ompt_start_tool_result->finalize +#endif + ) { ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data)); } Index: openmp/runtime/src/ompt-specific.cpp =================================================================== --- openmp/runtime/src/ompt-specific.cpp +++ openmp/runtime/src/ompt-specific.cpp @@ -292,10 +292,20 @@ thr->th.th_team->t.ompt_serialized_team_info; link_lwt->parent = my_parent; thr->th.th_team->t.ompt_serialized_team_info = link_lwt; +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) { + ompd_bp_parallel_begin(); + } +#endif } else { // this is the first serialized team, so we just store the values in the // team and drop the taskteam-object *OMPT_CUR_TEAM_INFO(thr) = lwt->ompt_team_info; +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) { + ompd_bp_parallel_begin(); + } +#endif *OMPT_CUR_TASK_INFO(thr) = lwt->ompt_task_info; } } @@ -303,6 +313,11 @@ void __ompt_lw_taskteam_unlink(kmp_info_t *thr) { ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; if (lwtask) { +#if OMPD_SUPPORT + if (ompd_state & OMPD_ENABLE_BP) { + ompd_bp_parallel_end(); + } +#endif thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; ompt_team_info_t tmp_team = lwtask->ompt_team_info;