Index: CREDITS.txt =================================================================== --- CREDITS.txt +++ CREDITS.txt @@ -26,6 +26,10 @@ W: http://openmprtl.org D: Created the runtime. +N: John Mellor-Crummey and other members of the OpenMP Tools Working Group +E: johnmc@rice.edu +D: OpenMP Tools Interface (OMPT) + N: Matthias Muller D: Contributor to testsuite from OpenUH Index: runtime/CMakeLists.txt =================================================================== --- runtime/CMakeLists.txt +++ runtime/CMakeLists.txt @@ -96,6 +96,11 @@ # particular openmp regions are recorded. set(stats false CACHE BOOL "Stats-Gathering functionality?" ) +# OMPT-support +set(ompt_support false CACHE BOOL "OMPT-support?" ) +set(ompt_blame true CACHE BOOL "OMPT-blame?" ) +set(ompt_trace true CACHE BOOL "OMPT-trace?" ) + # User specified flags. These are appended to the predetermined flags found in CommonFlags.cmake and ${CMAKE_C_COMPILER_ID}/*Flags.cmake (e.g., GNU/CFlags.cmake) set(USER_C_FLAGS "" CACHE STRING "Appended user specified C compiler flags." ) set(USER_CXX_FLAGS "" CACHE STRING "Appended user specified C++ compiler flags." ) @@ -205,6 +210,20 @@ set(STATS_GATHERING TRUE) endif() +# OMPT-support +set(OMPT_SUPPORT FALSE) +if("${ompt_support}") # string "on" or "ON" is seen as boolean TRUE + set(OMPT_SUPPORT TRUE) +endif() +set(OMPT_BLAME TRUE) +if(NOT "${ompt_blame}") # string "on" or "ON" is seen as boolean TRUE + set(OMPT_BLAME FALSE) +endif() +set(OMPT_TRACE TRUE) +if(NOT "${ompt_trace}") # string "on" or "ON" is seen as boolean TRUE + set(OMPT_TRACE FALSE) +endif() + # Include itt notify interface? Right now, always. set(USE_ITT_NOTIFY TRUE) @@ -285,6 +304,15 @@ if(${STATS_GATHERING}) set(suffix "${suffix}.s1") endif() +if(${OMPT_SUPPORT}) + set(suffix "${suffix}.ompt") + if(NOT ${OMPT_BLAME}) + set(suffix "${suffix}.no-ompt-blame") + endif() + if(NOT ${OMPT_TRACE}) + set(suffix "${suffix}.no-ompt-trace") + endif() +endif() #################################### # Setting file extensions / suffixes @@ -357,6 +385,11 @@ set(export_mod_files "omp_lib.mod" "omp_lib_kinds.mod") set(export_cmn_files1 "omp.h" "omp_lib.h" "omp_lib.f" "omp_lib.f90") set(export_cmn_files2 "iomp.h") + +if(${OMPT_SUPPORT}) + set(export_cmn_files1 ${export_cmn_files1} "ompt.h") +endif() + add_prefix("${export_lib_dir}/" export_lib_files) add_prefix("${export_ptf_dir}/include_compat/" export_inc_files) add_prefix("${export_ptf_dir}/include/" export_mod_files) @@ -563,6 +596,7 @@ endmacro() # copy from build directory to final resting places in exports directory simple_copy_recipe("omp.h" "${build_dir}" "${export_cmn_dir}/include") +simple_copy_recipe("ompt.h" "${build_dir}" "${export_cmn_dir}/include") simple_copy_recipe("omp_lib.h" "${build_dir}" "${export_cmn_dir}/include") simple_copy_recipe("omp_lib.f" "${build_dir}" "${export_cmn_dir}/include") simple_copy_recipe("omp_lib.f90" "${build_dir}" "${export_cmn_dir}/include") @@ -804,6 +838,7 @@ # libiomp.rc : ev-flags += -D KMP_FILE=$(lib_file) set_source_files_properties(libiomp.rc PROPERTIES COMPILE_DEFINITIONS "-D KMP_FILE=${lib_file}") expand_vars_recipe(omp.h) +expand_vars_recipe(ompt.h) expand_vars_recipe(omp_lib.h) expand_vars_recipe(omp_lib.f) expand_vars_recipe(omp_lib.f90) @@ -833,6 +868,11 @@ endif() say("Build : ${build}") say("Stats-Gathering : ${stats}") +say("OMPT-support : ${ompt_support}") +if(${OMPT_SUPPORT}) + say("OMPT-blame : ${ompt_blame}") + say("OMPT-trace : ${ompt_trace}") +endif() say("Use build.pl rules : ${USE_BUILDPL_RULES}") say("Adaptive locks : ${USE_ADAPTIVE_LOCKS}") say("Use predefined linker flags : ${USE_PREDEFINED_LINKER_FLAGS}") Index: runtime/cmake/Definitions.cmake =================================================================== --- runtime/cmake/Definitions.cmake +++ runtime/cmake/Definitions.cmake @@ -100,6 +100,21 @@ else() append_definitions("-D KMP_STATS_ENABLED=0") endif() + if(${OMPT_SUPPORT}) + append_definitions("-D OMPT_SUPPORT=1") + else() + append_definitions("-D OMPT_SUPPORT=0") + endif() + if(${OMPT_BLAME}) + append_definitions("-D OMPT_BLAME=1") + else() + append_definitions("-D OMPT_BLAME=0") + endif() + if(${OMPT_TRACE}) + append_definitions("-D OMPT_TRACE=1") + else() + append_definitions("-D OMPT_TRACE=0") + endif() # OpenMP version flags set(have_omp_50 0) Index: runtime/cmake/SourceFiles.cmake =================================================================== --- runtime/cmake/SourceFiles.cmake +++ runtime/cmake/SourceFiles.cmake @@ -32,6 +32,9 @@ append_c_source_file("kmp_ftn_cdecl.c") append_c_source_file("kmp_ftn_extra.c") append_c_source_file("kmp_version.c") + if(${OMPT_SUPPORT}) + append_c_source_file("ompt-general.c") + endif() if(${STUBS_LIBRARY}) append_c_source_file("kmp_stub.c") else() Index: runtime/src/exports_so.txt =================================================================== --- runtime/src/exports_so.txt +++ runtime/src/exports_so.txt @@ -21,6 +21,8 @@ # "Normal" symbols. # omp_*; # Standard OpenMP functions. + ompt_initialize; # OMPT initialization interface + ompt_control; # OMPT control interface ompc_*; # omp.h renames some standard functions to ompc_*. kmp_*; # Intel extensions. kmpc_*; # Intel extensions. Index: runtime/src/include/30/ompt.h.var =================================================================== --- /dev/null +++ runtime/src/include/30/ompt.h.var @@ -0,0 +1,472 @@ +/* + * include/30/ompt.h.var + */ + +#ifndef __OMPT__ +#define __OMPT__ + +/***************************************************************************** + * system include files + *****************************************************************************/ + +#include + + + +/***************************************************************************** + * iteration macros + *****************************************************************************/ + +#define FOREACH_OMPT_INQUIRY_FN(macro) \ + macro (ompt_enumerate_state) \ + \ + macro (ompt_set_callback) \ + macro (ompt_get_callback) \ + \ + macro (ompt_get_idle_frame) \ + macro (ompt_get_task_frame) \ + \ + macro (ompt_get_state) \ + \ + macro (ompt_get_parallel_id) \ + macro (ompt_get_parallel_team_size) \ + macro (ompt_get_task_id) \ + macro (ompt_get_thread_id) + +#define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ + macro (omp_idle) \ + macro (omp_overhead) \ + macro (omp_barrier_wait) \ + macro (omp_task_wait) \ + macro (omp_mutex_wait) + +#define FOREACH_OMPT_STATE(macro) \ + \ + /* first */ \ + macro (ompt_state_first, 0x71) /* initial enumeration state */ \ + \ + /* work states (0..15) */ \ + macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \ + macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \ + macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \ + \ + /* idle (16..31) */ \ + macro (ompt_state_idle, 0x10) /* waiting for work */ \ + \ + /* overhead states (32..63) */ \ + macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \ + \ + /* barrier wait states (64..79) */ \ + macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \ + macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \ + macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \ + \ + /* task wait states (80..95) */ \ + macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \ + macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \ + \ + /* mutex wait states (96..111) */ \ + macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \ + macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \ + macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \ + macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \ + macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \ + macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \ + \ + /* misc (112..127) */ \ + macro (ompt_state_undefined, 0x70) /* undefined thread state */ + + +#define FOREACH_OMPT_EVENT(macro) \ + \ + /*--- Mandatory Events ---*/ \ + macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \ + macro (ompt_event_parallel_end, ompt_parallel_callback_t, 2) /* parallel end */ \ + \ + macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \ + macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \ + \ + macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \ + macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \ + \ + macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \ + \ + macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \ + \ + /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ + macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \ + macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \ + \ + macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \ + macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \ + \ + macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \ + macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \ + \ + macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\ + macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \ + \ + macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \ + macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \ + macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \ + \ + macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \ + \ + macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \ + \ + /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ + macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \ + macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \ + \ + macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \ + macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \ + \ + macro (ompt_event_task_switch, ompt_task_switch_callback_t, 26) /* task switch */ \ + \ + macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \ + macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \ + \ + macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\ + macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \ + \ + macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \ + macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \ + \ + macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \ + macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \ + \ + macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ + macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \ + \ + macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \ + macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \ + \ + macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \ + macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \ + \ + macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \ + macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \ + \ + macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\ + macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \ + \ + macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \ + \ + macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \ + macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \ + macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \ + macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \ + macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \ + \ + macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \ + macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \ + macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \ + macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \ + macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \ + macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \ + \ + macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \ + macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \ + \ + macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ + macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ + \ + macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ + + + +/***************************************************************************** + * data types + *****************************************************************************/ + +/*--------------------- + * identifiers + *---------------------*/ + +typedef uint64_t ompt_thread_id_t; +#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_task_id_t; +#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_parallel_id_t; +#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_wait_id_t; +#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */ + + +/*--------------------- + * ompt_frame_t + *---------------------*/ + +typedef struct ompt_frame_s { + void *exit_runtime_frame; /* next frame is user code */ + void *reenter_runtime_frame; /* previous frame is user code */ +} ompt_frame_t; + + +/***************************************************************************** + * enumerations for thread states and runtime events + *****************************************************************************/ + +/*--------------------- + * runtime states + *---------------------*/ + +typedef enum { +#define ompt_state_macro(state, code) state = code, + FOREACH_OMPT_STATE(ompt_state_macro) +#undef ompt_state_macro +} ompt_state_t; + + +/*--------------------- + * runtime events + *---------------------*/ + +typedef enum { +#define ompt_event_macro(event, callback, eventid) event = eventid, + FOREACH_OMPT_EVENT(ompt_event_macro) +#undef ompt_event_macro +} ompt_event_t; + + +/*--------------------- + * set callback results + *---------------------*/ +typedef enum { + ompt_set_result_registration_error = 0, + ompt_set_result_event_may_occur_no_callback = 1, + ompt_set_result_event_never_occurs = 2, + ompt_set_result_event_may_occur_callback_some = 3, + ompt_set_result_event_may_occur_callback_always = 4, +} ompt_set_result_t; + + + +/***************************************************************************** + * callback signatures + *****************************************************************************/ + +/* initialization */ +typedef void (*ompt_interface_fn_t)(void); + +typedef ompt_interface_fn_t (*ompt_function_lookup_t)( + const char * /* entry point to look up */ +); + +/* threads */ +typedef void (*ompt_thread_callback_t) ( + ompt_thread_id_t thread_id /* ID of thread */ +); + +typedef enum { + ompt_thread_initial = 1, // start the enumeration at 1 + ompt_thread_worker = 2, + ompt_thread_other = 3 +} ompt_thread_type_t; + +typedef void (*ompt_thread_type_callback_t) ( + ompt_thread_type_t thread_type, /* type of thread */ + ompt_thread_id_t thread_id /* ID of thread */ +); + +typedef void (*ompt_wait_callback_t) ( + ompt_wait_id_t wait_id /* wait id */ +); + +/* parallel and workshares */ +typedef void (*ompt_parallel_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t task_id /* id of task */ +); + +typedef void (*ompt_new_workshare_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t parent_task_id, /* id of parent task */ + void *workshare_function /* pointer to outlined function */ +); + +typedef void (*ompt_new_parallel_callback_t) ( + ompt_task_id_t parent_task_id, /* id of parent task */ + ompt_frame_t *parent_task_frame, /* frame data of parent task */ + ompt_parallel_id_t parallel_id, /* id of parallel region */ + uint32_t requested_team_size, /* number of threads in team */ + void *parallel_function /* pointer to outlined function */ +); + +/* tasks */ +typedef void (*ompt_task_callback_t) ( + ompt_task_id_t task_id /* id of task */ +); + +typedef void (*ompt_task_switch_callback_t) ( + ompt_task_id_t suspended_task_id, /* tool data for suspended task */ + ompt_task_id_t resumed_task_id /* tool data for resumed task */ +); + +typedef void (*ompt_new_task_callback_t) ( + ompt_task_id_t parent_task_id, /* id of parent task */ + ompt_frame_t *parent_task_frame, /* frame data for parent task */ + ompt_task_id_t new_task_id, /* id of created task */ + void *task_function /* pointer to outlined function */ +); + +/* program */ +typedef void (*ompt_control_callback_t) ( + uint64_t command, /* command of control call */ + uint64_t modifier /* modifier of control call */ +); + +typedef void (*ompt_callback_t)(void); + + +/**************************************************************************** + * ompt API + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +#define OMPT_API_FNTYPE(fn) fn##_t + +#define OMPT_API_FUNCTION(return_type, fn, args) \ + typedef return_type (*OMPT_API_FNTYPE(fn)) args + + + +/**************************************************************************** + * INQUIRY FUNCTIONS + ***************************************************************************/ + +/* state */ +OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( + ompt_wait_id_t *ompt_wait_id +)); + +/* thread */ +OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); + +OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); + +/* parallel region */ +OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( + int ancestor_level +)); + +OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( + int ancestor_level +)); + +/* task */ +OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( + int depth +)); + +OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( + int depth +)); + + + +/**************************************************************************** + * PLACEHOLDERS FOR PERFORMANCE REPORTING + ***************************************************************************/ + +/* idle */ +OMPT_API_FUNCTION(void, omp_idle, ( + void +)); + +/* overhead */ +OMPT_API_FUNCTION(void, omp_overhead, ( + void +)); + +/* barrier wait */ +OMPT_API_FUNCTION(void, omp_barrier_wait, ( + void +)); + +/* task wait */ +OMPT_API_FUNCTION(void, omp_task_wait, ( + void +)); + +/* mutex wait */ +OMPT_API_FUNCTION(void, omp_mutex_wait, ( + void +)); + + + +/**************************************************************************** + * INITIALIZATION FUNCTIONS + ***************************************************************************/ + +/* initialization interface to be defined by tool */ +int ompt_initialize( + ompt_function_lookup_t ompt_fn_lookup, + const char *runtime_version, + unsigned int ompt_version +); + +typedef enum opt_init_mode_e { + ompt_init_mode_never = 0, + ompt_init_mode_false = 1, + ompt_init_mode_true = 2, + ompt_init_mode_always = 3 +} ompt_init_mode_t; + +OMPT_API_FUNCTION(int, ompt_set_callback, ( + ompt_event_t event, + ompt_callback_t callback +)); + +typedef enum ompt_set_callback_rc_e { /* non-standard */ + ompt_set_callback_error = 0, + ompt_has_event_no_callback = 1, + ompt_no_event_no_callback = 2, + ompt_has_event_may_callback = 3, + ompt_has_event_must_callback = 4, +} ompt_set_callback_rc_t; + + +OMPT_API_FUNCTION(int, ompt_get_callback, ( + ompt_event_t event, + ompt_callback_t *callback +)); + + + +/**************************************************************************** + * MISCELLANEOUS FUNCTIONS + ***************************************************************************/ + +/* control */ +#if defined(_OPENMP) && (_OPENMP >= 201307) +#pragma omp declare target +#endif +void ompt_control( + uint64_t command, + uint64_t modifier +); +#if defined(_OPENMP) && (_OPENMP >= 201307) +#pragma omp end declare target +#endif + +/* state enumeration */ +OMPT_API_FUNCTION(int, ompt_enumerate_state, ( + int current_state, + int *next_state, + const char **next_state_name +)); + +#ifdef __cplusplus +}; +#endif + +#endif + Index: runtime/src/include/40/ompt.h.var =================================================================== --- /dev/null +++ runtime/src/include/40/ompt.h.var @@ -0,0 +1,472 @@ +/* + * include/40/ompt.h.var + */ + +#ifndef __OMPT__ +#define __OMPT__ + +/***************************************************************************** + * system include files + *****************************************************************************/ + +#include + + + +/***************************************************************************** + * iteration macros + *****************************************************************************/ + +#define FOREACH_OMPT_INQUIRY_FN(macro) \ + macro (ompt_enumerate_state) \ + \ + macro (ompt_set_callback) \ + macro (ompt_get_callback) \ + \ + macro (ompt_get_idle_frame) \ + macro (ompt_get_task_frame) \ + \ + macro (ompt_get_state) \ + \ + macro (ompt_get_parallel_id) \ + macro (ompt_get_parallel_team_size) \ + macro (ompt_get_task_id) \ + macro (ompt_get_thread_id) + +#define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ + macro (omp_idle) \ + macro (omp_overhead) \ + macro (omp_barrier_wait) \ + macro (omp_task_wait) \ + macro (omp_mutex_wait) + +#define FOREACH_OMPT_STATE(macro) \ + \ + /* first */ \ + macro (ompt_state_first, 0x71) /* initial enumeration state */ \ + \ + /* work states (0..15) */ \ + macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \ + macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \ + macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \ + \ + /* idle (16..31) */ \ + macro (ompt_state_idle, 0x10) /* waiting for work */ \ + \ + /* overhead states (32..63) */ \ + macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \ + \ + /* barrier wait states (64..79) */ \ + macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \ + macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \ + macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \ + \ + /* task wait states (80..95) */ \ + macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \ + macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \ + \ + /* mutex wait states (96..111) */ \ + macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \ + macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \ + macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \ + macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \ + macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \ + macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \ + \ + /* misc (112..127) */ \ + macro (ompt_state_undefined, 0x70) /* undefined thread state */ + + +#define FOREACH_OMPT_EVENT(macro) \ + \ + /*--- Mandatory Events ---*/ \ + macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \ + macro (ompt_event_parallel_end, ompt_parallel_callback_t, 2) /* parallel end */ \ + \ + macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \ + macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \ + \ + macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \ + macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \ + \ + macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \ + \ + macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \ + \ + /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ + macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \ + macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \ + \ + macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \ + macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \ + \ + macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \ + macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \ + \ + macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\ + macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \ + \ + macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \ + macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \ + macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \ + \ + macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \ + \ + macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \ + \ + /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ + macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \ + macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \ + \ + macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \ + macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \ + \ + macro (ompt_event_task_switch, ompt_task_switch_callback_t, 26) /* task switch */ \ + \ + macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \ + macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \ + \ + macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\ + macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \ + \ + macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \ + macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \ + \ + macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \ + macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \ + \ + macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ + macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \ + \ + macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \ + macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \ + \ + macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \ + macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \ + \ + macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \ + macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \ + \ + macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\ + macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \ + \ + macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \ + \ + macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \ + macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \ + macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \ + macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \ + macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \ + \ + macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \ + macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \ + macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \ + macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \ + macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \ + macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \ + \ + macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \ + macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \ + \ + macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ + macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ + \ + macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ + + + +/***************************************************************************** + * data types + *****************************************************************************/ + +/*--------------------- + * identifiers + *---------------------*/ + +typedef uint64_t ompt_thread_id_t; +#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_task_id_t; +#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_parallel_id_t; +#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ + +typedef uint64_t ompt_wait_id_t; +#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */ + + +/*--------------------- + * ompt_frame_t + *---------------------*/ + +typedef struct ompt_frame_s { + void *exit_runtime_frame; /* next frame is user code */ + void *reenter_runtime_frame; /* previous frame is user code */ +} ompt_frame_t; + + +/***************************************************************************** + * enumerations for thread states and runtime events + *****************************************************************************/ + +/*--------------------- + * runtime states + *---------------------*/ + +typedef enum { +#define ompt_state_macro(state, code) state = code, + FOREACH_OMPT_STATE(ompt_state_macro) +#undef ompt_state_macro +} ompt_state_t; + + +/*--------------------- + * runtime events + *---------------------*/ + +typedef enum { +#define ompt_event_macro(event, callback, eventid) event = eventid, + FOREACH_OMPT_EVENT(ompt_event_macro) +#undef ompt_event_macro +} ompt_event_t; + + +/*--------------------- + * set callback results + *---------------------*/ +typedef enum { + ompt_set_result_registration_error = 0, + ompt_set_result_event_may_occur_no_callback = 1, + ompt_set_result_event_never_occurs = 2, + ompt_set_result_event_may_occur_callback_some = 3, + ompt_set_result_event_may_occur_callback_always = 4, +} ompt_set_result_t; + + + +/***************************************************************************** + * callback signatures + *****************************************************************************/ + +/* initialization */ +typedef void (*ompt_interface_fn_t)(void); + +typedef ompt_interface_fn_t (*ompt_function_lookup_t)( + const char * /* entry point to look up */ +); + +/* threads */ +typedef void (*ompt_thread_callback_t) ( + ompt_thread_id_t thread_id /* ID of thread */ +); + +typedef enum { + ompt_thread_initial = 1, // start the enumeration at 1 + ompt_thread_worker = 2, + ompt_thread_other = 3 +} ompt_thread_type_t; + +typedef void (*ompt_thread_type_callback_t) ( + ompt_thread_type_t thread_type, /* type of thread */ + ompt_thread_id_t thread_id /* ID of thread */ +); + +typedef void (*ompt_wait_callback_t) ( + ompt_wait_id_t wait_id /* wait id */ +); + +/* parallel and workshares */ +typedef void (*ompt_parallel_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t task_id /* id of task */ +); + +typedef void (*ompt_new_workshare_callback_t) ( + ompt_parallel_id_t parallel_id, /* id of parallel region */ + ompt_task_id_t parent_task_id, /* id of parent task */ + void *workshare_function /* pointer to outlined function */ +); + +typedef void (*ompt_new_parallel_callback_t) ( + ompt_task_id_t parent_task_id, /* id of parent task */ + ompt_frame_t *parent_task_frame, /* frame data of parent task */ + ompt_parallel_id_t parallel_id, /* id of parallel region */ + uint32_t requested_team_size, /* number of threads in team */ + void *parallel_function /* pointer to outlined function */ +); + +/* tasks */ +typedef void (*ompt_task_callback_t) ( + ompt_task_id_t task_id /* id of task */ +); + +typedef void (*ompt_task_switch_callback_t) ( + ompt_task_id_t suspended_task_id, /* tool data for suspended task */ + ompt_task_id_t resumed_task_id /* tool data for resumed task */ +); + +typedef void (*ompt_new_task_callback_t) ( + ompt_task_id_t parent_task_id, /* id of parent task */ + ompt_frame_t *parent_task_frame, /* frame data for parent task */ + ompt_task_id_t new_task_id, /* id of created task */ + void *task_function /* pointer to outlined function */ +); + +/* program */ +typedef void (*ompt_control_callback_t) ( + uint64_t command, /* command of control call */ + uint64_t modifier /* modifier of control call */ +); + +typedef void (*ompt_callback_t)(void); + + +/**************************************************************************** + * ompt API + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +#define OMPT_API_FNTYPE(fn) fn##_t + +#define OMPT_API_FUNCTION(return_type, fn, args) \ + typedef return_type (*OMPT_API_FNTYPE(fn)) args + + + +/**************************************************************************** + * INQUIRY FUNCTIONS + ***************************************************************************/ + +/* state */ +OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( + ompt_wait_id_t *ompt_wait_id +)); + +/* thread */ +OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); + +OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); + +/* parallel region */ +OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( + int ancestor_level +)); + +OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( + int ancestor_level +)); + +/* task */ +OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( + int depth +)); + +OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( + int depth +)); + + + +/**************************************************************************** + * PLACEHOLDERS FOR PERFORMANCE REPORTING + ***************************************************************************/ + +/* idle */ +OMPT_API_FUNCTION(void, omp_idle, ( + void +)); + +/* overhead */ +OMPT_API_FUNCTION(void, omp_overhead, ( + void +)); + +/* barrier wait */ +OMPT_API_FUNCTION(void, omp_barrier_wait, ( + void +)); + +/* task wait */ +OMPT_API_FUNCTION(void, omp_task_wait, ( + void +)); + +/* mutex wait */ +OMPT_API_FUNCTION(void, omp_mutex_wait, ( + void +)); + + + +/**************************************************************************** + * INITIALIZATION FUNCTIONS + ***************************************************************************/ + +/* initialization interface to be defined by tool */ +int ompt_initialize( + ompt_function_lookup_t ompt_fn_lookup, + const char *runtime_version, + unsigned int ompt_version +); + +typedef enum opt_init_mode_e { + ompt_init_mode_never = 0, + ompt_init_mode_false = 1, + ompt_init_mode_true = 2, + ompt_init_mode_always = 3 +} ompt_init_mode_t; + +OMPT_API_FUNCTION(int, ompt_set_callback, ( + ompt_event_t event, + ompt_callback_t callback +)); + +typedef enum ompt_set_callback_rc_e { /* non-standard */ + ompt_set_callback_error = 0, + ompt_has_event_no_callback = 1, + ompt_no_event_no_callback = 2, + ompt_has_event_may_callback = 3, + ompt_has_event_must_callback = 4, +} ompt_set_callback_rc_t; + + +OMPT_API_FUNCTION(int, ompt_get_callback, ( + ompt_event_t event, + ompt_callback_t *callback +)); + + + +/**************************************************************************** + * MISCELLANEOUS FUNCTIONS + ***************************************************************************/ + +/* control */ +#if defined(_OPENMP) && (_OPENMP >= 201307) +#pragma omp declare target +#endif +void ompt_control( + uint64_t command, + uint64_t modifier +); +#if defined(_OPENMP) && (_OPENMP >= 201307) +#pragma omp end declare target +#endif + +/* state enumeration */ +OMPT_API_FUNCTION(int, ompt_enumerate_state, ( + int current_state, + int *next_state, + const char **next_state_name +)); + +#ifdef __cplusplus +}; +#endif + +#endif + Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -108,6 +108,10 @@ # pragma weak clock_gettime #endif +#if OMPT_SUPPORT +#include "ompt-internal.h" +#endif + /*Select data placement in NUMA memory */ #define NO_FIRST_TOUCH 0 #define FIRST_TOUCH 1 /* Exploit SGI's first touch page placement algo */ @@ -2026,6 +2030,9 @@ kmp_dephash_t * td_dephash; // Dependencies for children tasks are tracked from here kmp_depnode_t * td_depnode; // Pointer to graph node if this task has dependencies #endif +#if OMPT_SUPPORT + ompt_task_info_t ompt_task_info; +#endif #if KMP_HAVE_QUAD _Quad td_dummy; // Align structure 16-byte size since allocated just before kmp_task_t #else @@ -2188,6 +2195,11 @@ /* TODO the first serial team should actually be stored in the info_t * structure. this will help reduce initial allocation overhead */ KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/ + +#if OMPT_SUPPORT + ompt_thread_info_t ompt_thread_info; +#endif + /* The following are also read by the master during reinit */ struct common_table *th_pri_common; @@ -2322,6 +2334,12 @@ int t_nproc; // number of threads in team microtask_t t_pkfn; launch_t t_invoke; // procedure to launch the microtask + +#if OMPT_SUPPORT + ompt_team_info_t ompt_team_info; + ompt_lw_taskteam_t *ompt_serialized_team_info; +#endif + #if KMP_ARCH_X86 || KMP_ARCH_X86_64 kmp_int8 t_fp_control_saved; kmp_int8 t_pad2b; @@ -3091,6 +3109,9 @@ extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t task_entry ); +#if OMPT_SUPPORT +extern void __kmp_task_init_ompt( kmp_taskdata_t * task, int tid ); +#endif extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task ); Index: runtime/src/kmp_tasking.c =================================================================== --- runtime/src/kmp_tasking.c +++ runtime/src/kmp_tasking.c @@ -18,6 +18,9 @@ #include "kmp_itt.h" #include "kmp_wait_release.h" +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif /* ------------------------------------------------------------------------ */ @@ -703,6 +706,27 @@ #endif // TASK_UNUSED +#if OMPT_SUPPORT +//---------------------------------------------------------------------------------------------------- +// __kmp_task_init_ompt: +// Initialize OMPT fields maintained by a task. Since the serial task is initialized before +// ompt_initialize is called, at the point the serial task is initialized we don't know whether +// OMPT will be used or not when the serial task is initialized. This function provides the support +// needed to initialize OMPT for the serial task after the fact. + +void +__kmp_task_init_ompt( kmp_taskdata_t * task, int tid ) +{ + task->ompt_task_info.task_id = __ompt_task_id_new(tid); + task->ompt_task_info.function = NULL; + task->ompt_task_info.frame = (ompt_frame_t) { + .exit_runtime_frame = NULL, + .reenter_runtime_frame = NULL + }; +} +#endif + + //---------------------------------------------------------------------------------------------------- // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread // Index: runtime/src/makefile.mk =================================================================== --- runtime/src/makefile.mk +++ runtime/src/makefile.mk @@ -94,6 +94,9 @@ CXXFLAGS=$(subst $(space),_,$(CXXFLAGS)) FFLAGS=$(subst $(space),_,$(FFLAGS)) LDFLAGS=$(subst $(space),_,$(LDFLAGS)) + OMPT_SUPPORT=$(OMPT_SUPPORT) + OMPT_BLAME=$(OMPT_BLAME) + OMPT_TRACE=$(OMPT_TRACE) endef # And check it. include $(tools_dir)src/common-checks.mk @@ -689,12 +692,25 @@ # -------------------------------------------------------------------------------------------------- # Files. # -------------------------------------------------------------------------------------------------- +ifeq "$(OMPT_SUPPORT)" "on" + ompt_items = ompt-general + cpp-flags += -D OMPT_SUPPORT=1 + + ifeq "$(OMPT_BLAME)" "on" + cpp-flags += -D OMPT_BLAME=1 + endif + + ifeq "$(OMPT_TRACE)" "on" + cpp-flags += -D OMPT_TRACE=1 + endif +endif # Library files. These files participate in all kinds of library. lib_c_items := \ kmp_ftn_cdecl \ kmp_ftn_extra \ kmp_version \ + $(ompt_items) \ $(empty) lib_cpp_items := lib_asm_items := @@ -858,7 +874,7 @@ out_mod_files = \ $(addprefix $(out_ptf_dir)include/,omp_lib.mod omp_lib_kinds.mod) out_cmn_files = \ - $(addprefix $(out_cmn_dir)include/,omp.h omp_lib.h omp_lib.f omp_lib.f90) \ + $(addprefix $(out_cmn_dir)include/,omp.h ompt.h omp_lib.h omp_lib.f omp_lib.f90) \ $(addprefix $(out_cmn_dir)include_compat/,iomp.h) ifneq "$(out_lib_fat_dir)" "" out_lib_fat_files = $(addprefix $(out_lib_fat_dir),$(lib_file) $(imp_file)) Index: runtime/src/ompt-event-specific.h =================================================================== --- /dev/null +++ runtime/src/ompt-event-specific.h @@ -0,0 +1,144 @@ +#ifndef __OMPT_EVENT_SPECIFIC_H__ +#define __OMPT_EVENT_SPECIFIC_H__ + +/****************************************************************************** + * File: ompt-event-specific.h + * + * Description: + * + * specify which of the OMPT events are implemented by this runtime system + * and the level of their implementation by a runtime system. + *****************************************************************************/ + +#define _ompt_tokenpaste_helper(x,y) x ## y +#define _ompt_tokenpaste(x,y) _ompt_tokenpaste_helper(x,y) +#define ompt_event_implementation_status(e) _ompt_tokenpaste(e,_implemented) + + +/*---------------------------------------------------------------------------- + | Specify whether an event may occur or not, and whether event callbacks + | never, sometimes, or always occur. + | + | The values for these constants are defined in section 6.1.2 of + | the OMPT TR. They are exposed to tools through ompt_set_callback. + +--------------------------------------------------------------------------*/ + +#define ompt_event_NEVER ompt_set_result_event_never_occurs +#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback +#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some +#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always + +#if OMPT_TRACE +#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS +#else +#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED +#endif + +#if OMPT_BLAME +#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS +#else +#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED +#endif + +/*---------------------------------------------------------------------------- + | Mandatory Events + +--------------------------------------------------------------------------*/ + +#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS + +#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS + +#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS + +#define ompt_event_control_implemented ompt_event_MAY_ALWAYS + +#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS + + +/*---------------------------------------------------------------------------- + | Optional Events (blame shifting) + +--------------------------------------------------------------------------*/ + +#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME + +#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME + +#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_nest_lock_last_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME + + +/*---------------------------------------------------------------------------- + | Optional Events (synchronous events) + +--------------------------------------------------------------------------*/ + +#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_task_switch_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_taskwait_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_taskwait_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_release_nest_lock_prev_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_acquired_lock_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_acquired_nest_lock_first_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_acquired_nest_lock_next_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE + +#define ompt_event_init_lock_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_init_nest_lock_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_destroy_lock_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_destroy_nest_lock_implemented ompt_event_UNIMPLEMENTED + +#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED + +#endif Index: runtime/src/ompt-general.c =================================================================== --- /dev/null +++ runtime/src/ompt-general.c @@ -0,0 +1,391 @@ +/***************************************************************************** + * system include files + ****************************************************************************/ + +#include + +#include +#include +#include +#include + + + +/***************************************************************************** + * ompt include files + ****************************************************************************/ + +#include "ompt-internal.h" +#include "ompt-specific.c" + + + +/***************************************************************************** + * macros + ****************************************************************************/ + +#define ompt_get_callback_success 1 +#define ompt_get_callback_failure 0 + +#define no_tool_present 0 + +#define OMPT_API_ROUTINE static + + + +/***************************************************************************** + * types + ****************************************************************************/ + +typedef struct { + const char *state_name; + ompt_state_t state_id; +} ompt_state_info_t; + + + +/***************************************************************************** + * global variables + ****************************************************************************/ + +ompt_status_t ompt_status = ompt_status_ready; + + +ompt_state_info_t ompt_state_info[] = { +#define ompt_state_macro(state, code) { # state, state }, + FOREACH_OMPT_STATE(ompt_state_macro) +#undef ompt_state_macro +}; + + +ompt_callbacks_t ompt_callbacks; + + + +/***************************************************************************** + * forward declarations + ****************************************************************************/ + +static ompt_interface_fn_t ompt_fn_lookup(const char *s); + + +/***************************************************************************** + * state + ****************************************************************************/ + +OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state, + const char **next_state_name) +{ + const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t); + int i = 0; + + for (i = 0; i < len - 1; i++) { + if (ompt_state_info[i].state_id == current_state) { + *next_state = ompt_state_info[i+1].state_id; + *next_state_name = ompt_state_info[i+1].state_name; + return 1; + } + } + + return 0; +} + + + +/***************************************************************************** + * callbacks + ****************************************************************************/ + +OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) +{ + switch (evid) { + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case event_name: \ + if (ompt_event_implementation_status(event_name)) { \ + ompt_callbacks.ompt_callback(event_name) = (callback_type) cb; \ + } \ + return ompt_event_implementation_status(event_name); + + FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro + + default: return ompt_set_result_registration_error; + } +} + + +OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) +{ + switch (evid) { + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case event_name: \ + if (ompt_event_implementation_status(event_name)) { \ + ompt_callback_t mycb = \ + (ompt_callback_t) ompt_callbacks.ompt_callback(event_name); \ + if (mycb) { \ + *cb = mycb; \ + return ompt_get_callback_success; \ + } \ + } \ + return ompt_get_callback_failure; + + FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro + + default: return ompt_get_callback_failure; + } +} + + + +/***************************************************************************** + * intialization/finalization + ****************************************************************************/ + +_OMP_EXTERN __attribute__ (( weak )) +int ompt_initialize(ompt_function_lookup_t ompt_fn_lookup, const char *version, + unsigned int ompt_version) +{ + return no_tool_present; +} + +enum tool_setting_e { + omp_tool_error, + omp_tool_unset, + omp_tool_disabled, + omp_tool_enabled +}; + +void ompt_init() +{ + static int ompt_initialized = 0; + + if (ompt_initialized) return; + + const char *ompt_env_var = getenv("OMP_TOOL"); + tool_setting_e tool_setting = omp_tool_error; + + if (!ompt_env_var || !strcmp(ompt_env_var, "")) + tool_setting = omp_tool_unset; + else if (!strcmp(ompt_env_var, "disabled")) + tool_setting = omp_tool_disabled; + else if (!strcmp(ompt_env_var, "enabled")) + tool_setting = omp_tool_enabled; + + switch(tool_setting) { + case omp_tool_disabled: + ompt_status = ompt_status_disabled; + break; + + case omp_tool_unset: + case omp_tool_enabled: + { + const char *runtime_version = __ompt_get_runtime_version_internal(); + int ompt_init_val = + ompt_initialize(ompt_fn_lookup, runtime_version, OMPT_VERSION); + + if (ompt_init_val) { + ompt_status = ompt_status_track_callback; + __ompt_init_internal(); + } + break; + } + + case omp_tool_error: + fprintf(stderr, + "Warning: OMP_TOOL has invalid value \"%s\".\n" + " legal values are (NULL,\"\",\"disabled\"," + "\"enabled\").\n", ompt_env_var); + break; + } + + ompt_initialized = 1; +} + + +void ompt_fini() +{ + if (ompt_status == ompt_status_track_callback) { + if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) { + ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)(); + } + } + + ompt_status = ompt_status_disabled; +} + + + +/***************************************************************************** + * parallel regions + ****************************************************************************/ + +OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) +{ + return __ompt_get_parallel_id_internal(ancestor_level); +} + + +OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) +{ + return __ompt_get_parallel_team_size_internal(ancestor_level); +} + + +OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) +{ + return __ompt_get_parallel_function_internal(ancestor_level); +} + + +OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) +{ + ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id); + + if (thread_state == ompt_state_undefined) { + thread_state = ompt_state_work_serial; + } + + return thread_state; +} + + + +/***************************************************************************** + * threads + ****************************************************************************/ + + +OMPT_API_ROUTINE void *ompt_get_idle_frame() +{ + return __ompt_get_idle_frame_internal(); +} + + + +/***************************************************************************** + * tasks + ****************************************************************************/ + + +OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) +{ + return __ompt_get_thread_id_internal(); +} + +OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) +{ + return __ompt_get_task_id_internal(depth); +} + + +OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) +{ + return __ompt_get_task_frame_internal(depth); +} + + +OMPT_API_ROUTINE void *ompt_get_task_function(int depth) +{ + return __ompt_get_task_function_internal(depth); +} + + +/***************************************************************************** + * placeholders + ****************************************************************************/ + + +OMPT_API_ROUTINE void omp_idle(void) +{ + // this function is a placeholder used to represent the calling context of + // idle OpenMP worker threads. It is not meant to be invoked. + assert(0); +} + + +OMPT_API_ROUTINE void omp_overhead(void) +{ + // this function is a placeholder used to represent the OpenMP context of + // threads working in the OpenMP runtime. It is not meant to be invoked. + assert(0); +} + + +OMPT_API_ROUTINE void omp_barrier_wait(void) +{ + // this function is a placeholder used to represent the OpenMP context of + // threads waiting for a barrier in the OpenMP runtime. It is not meant + // to be invoked. + assert(0); +} + + +OMPT_API_ROUTINE void omp_task_wait(void) +{ + // this function is a placeholder used to represent the OpenMP context of + // threads waiting for a task in the OpenMP runtime. It is not meant + // to be invoked. + assert(0); +} + + +OMPT_API_ROUTINE void omp_mutex_wait(void) +{ + // this function is a placeholder used to represent the OpenMP context of + // threads waiting for a mutex in the OpenMP runtime. It is not meant + // to be invoked. + assert(0); +} + + +/***************************************************************************** + * compatability + ****************************************************************************/ + +OMPT_API_ROUTINE int ompt_get_ompt_version() +{ + return OMPT_VERSION; +} + + + +/***************************************************************************** + * application-facing API + ****************************************************************************/ + + +/*---------------------------------------------------------------------------- + | control + ---------------------------------------------------------------------------*/ + +_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) +{ + if (ompt_status == ompt_status_track_callback && + ompt_callbacks.ompt_callback(ompt_event_control)) { + ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier); + } +} + + + +/***************************************************************************** + * API inquiry for tool + ****************************************************************************/ + +static ompt_interface_fn_t ompt_fn_lookup(const char *s) +{ + +#define ompt_interface_fn(fn) \ + if (strcmp(s, #fn) == 0) return (ompt_interface_fn_t) fn; + + FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) + + FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn) + + return (ompt_interface_fn_t) 0; +} Index: runtime/src/ompt-internal.h =================================================================== --- /dev/null +++ runtime/src/ompt-internal.h @@ -0,0 +1,85 @@ +#ifndef __OMPT_INTERNAL_H__ +#define __OMPT_INTERNAL_H__ + +#include "ompt.h" +#include "ompt-event-specific.h" + +#define OMPT_VERSION 1 + +#define _OMP_EXTERN extern "C" + + + +#define ompt_callback(e) e ## _callback + +/* track and track_callback share a bit so that one can test whether either is + * set by anding a bit. + */ +typedef enum { + ompt_status_disabled = 0x0, + ompt_status_ready = 0x1, + ompt_status_track = 0x2, + ompt_status_track_callback = 0x6, +} ompt_status_t; + + +typedef struct ompt_callbacks_s { +#define ompt_event_macro(event, callback, eventid) callback ompt_callback(event); + + FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_callbacks_t; + + + +typedef struct { + ompt_frame_t frame; + void* function; + ompt_task_id_t task_id; +} ompt_task_info_t; + + +typedef struct { + ompt_parallel_id_t parallel_id; + void *microtask; +} ompt_team_info_t; + + +typedef struct ompt_lw_taskteam_s { + ompt_team_info_t ompt_team_info; + ompt_task_info_t ompt_task_info; + struct ompt_lw_taskteam_s *parent; +} ompt_lw_taskteam_t; + + +typedef struct ompt_parallel_info_s { + ompt_task_id_t parent_task_id; /* id of parent task */ + ompt_parallel_id_t parallel_id; /* id of parallel region */ + ompt_frame_t *parent_task_frame; /* frame data of parent task */ + void *parallel_function; /* pointer to outlined function */ +} ompt_parallel_info_t; + + +typedef struct { + ompt_state_t state; + ompt_wait_id_t wait_id; + void *idle_frame; +} ompt_thread_info_t; + + +extern ompt_status_t ompt_status; +extern ompt_callbacks_t ompt_callbacks; + +#ifdef __cplusplus +extern "C" { +#endif + +void ompt_init(void); +void ompt_fini(void); + +#ifdef __cplusplus +}; +#endif + +#endif Index: runtime/src/ompt-specific.h =================================================================== --- /dev/null +++ runtime/src/ompt-specific.h @@ -0,0 +1,49 @@ +#ifndef OMPT_SPECIFIC_H +#define OMPT_SPECIFIC_H + +#include "kmp.h" + +void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid); +void __ompt_thread_assign_wait_id(void *variable); + +void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, + int gtid, void *microtask, + ompt_parallel_id_t ompt_pid); + +void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr); + +ompt_lw_taskteam_t * __ompt_lw_taskteam_unlink(kmp_info_t *thr); + +ompt_parallel_id_t __ompt_parallel_id_new(int gtid); +ompt_task_id_t __ompt_task_id_new(int gtid); + +ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); + +ompt_task_info_t *__ompt_get_taskinfo(int depth); + +inline kmp_info_t * +ompt_get_thread_gtid(int gtid) +{ + return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL; +} + +inline kmp_info_t * +ompt_get_thread() +{ + int gtid = __kmp_gtid_get_specific(); + return ompt_get_thread_gtid(gtid); +} + + +void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid); + +void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid); + + +int __ompt_get_parallel_team_size_internal(int ancestor_level); + +ompt_task_id_t __ompt_get_task_id_internal(int depth); + +ompt_frame_t *__ompt_get_task_frame_internal(int depth); + +#endif Index: runtime/src/ompt-specific.c =================================================================== --- /dev/null +++ runtime/src/ompt-specific.c @@ -0,0 +1,374 @@ +//****************************************************************************** +// include files +//****************************************************************************** + +#include "kmp.h" +#include "ompt-internal.h" +#include "ompt-specific.h" + +//****************************************************************************** +// macros +//****************************************************************************** + +#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t) (id >=0) ? id + 1: 0) + +#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info; + +#define OMPT_THREAD_ID_BITS 16 + +// 2013 08 24 - John Mellor-Crummey +// ideally, a thread should assign its own ids based on thread private data. +// however, the way the intel runtime reinitializes thread data structures +// when it creates teams makes it difficult to maintain persistent thread +// data. using a shared variable instead is simple. I leave it to intel to +// sort out how to implement a higher performance version in their runtime. + +// when using fetch_and_add to generate the IDs, there isn't any reason to waste +// bits for thread id. +#if 0 +#define NEXT_ID(id_ptr,tid) \ + ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid)) +#else +#define NEXT_ID(id_ptr,tid) (KMP_TEST_THEN_INC64(id_ptr)) +#endif + +//****************************************************************************** +// private operations +//****************************************************************************** + +//---------------------------------------------------------- +// traverse the team and task hierarchy +// note: __ompt_get_teaminfo and __ompt_get_taskinfo +// traverse the hierarchy similarly and need to be +// kept consistent +//---------------------------------------------------------- + +ompt_team_info_t * +__ompt_get_teaminfo(int depth, int *size) +{ + kmp_info_t *thr = ompt_get_thread(); + + if (thr) { + kmp_team *team = thr->th.th_team; + ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team); + + while(depth > 0) { + // next lightweight team (if any) + if (lwt) lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && team) team=team->t.t_parent; + + depth--; + } + + if (lwt) { + // lightweight teams have one task + if (size) *size = 1; + + // return team info for lightweight team + return &lwt->ompt_team_info; + } else if (team) { + // extract size from heavyweight team + if (size) *size = team->t.t_nproc; + + // return team info for heavyweight team + return &team->t.ompt_team_info; + } + } + + return NULL; +} + + +ompt_task_info_t * +__ompt_get_taskinfo(int depth) +{ + ompt_task_info_t *info = NULL; + kmp_info_t *thr = ompt_get_thread(); + + if (thr) { + kmp_taskdata_t *taskdata = thr->th.th_current_task; + ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team); + + while (depth > 0) { + // next lightweight team (if any) + if (lwt) lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && taskdata) { + taskdata = taskdata->td_parent; + if (taskdata) { + lwt = LWT_FROM_TEAM(taskdata->td_team); + } + } + depth--; + } + + if (lwt) { + info = &lwt->ompt_task_info; + } else if (taskdata) { + info = &taskdata->ompt_task_info; + } + } + + return info; +} + + + +//****************************************************************************** +// interface operations +//****************************************************************************** + +//---------------------------------------------------------- +// initialization support +//---------------------------------------------------------- + +void +__ompt_init_internal() +{ + if (ompt_status & ompt_status_track) { + // initialize initial thread for OMPT + kmp_info_t *root_thread = ompt_get_thread(); + __kmp_task_init_ompt( + root_thread->th.th_team->t.t_implicit_task_taskdata, 0); + __kmp_task_init_ompt( + root_thread->th.th_serial_team->t.t_implicit_task_taskdata, 0); + + // make mandatory callback for creation of initial thread + // this needs to occur here rather than in __kmp_register_root because + // __kmp_register_root is called before ompt_initialize + int gtid = __kmp_get_gtid(); + if (KMP_UBER_GTID(gtid)) { + // initialize the initial thread's idle frame and state + root_thread->th.ompt_thread_info.idle_frame = 0; + root_thread->th.ompt_thread_info.state = ompt_state_overhead; + if ((ompt_status == ompt_status_track_callback) && + ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { + __ompt_thread_begin(ompt_thread_initial, gtid); + } + root_thread->th.ompt_thread_info.state = ompt_state_work_serial; + } + } +} + + +//---------------------------------------------------------- +// thread support +//---------------------------------------------------------- + +ompt_parallel_id_t +__ompt_thread_id_new() +{ + static uint64_t ompt_thread_id = 1; + return NEXT_ID(&ompt_thread_id, 0); +} + +void +__ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) +{ + ompt_callbacks.ompt_callback(ompt_event_thread_begin)( + thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); +} + + +void +__ompt_thread_end(ompt_thread_type_t thread_type, int gtid) +{ + ompt_callbacks.ompt_callback(ompt_event_thread_end)( + thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); +} + + +ompt_thread_id_t +__ompt_get_thread_id_internal() +{ + // FIXME + // until we have a better way of assigning ids, use __kmp_get_gtid + // since the return value might be negative, we need to test that before + // assigning it to an ompt_thread_id_t, which is unsigned. + int id = __kmp_get_gtid(); + assert(id >= 0); + + return GTID_TO_OMPT_THREAD_ID(id); +} + +//---------------------------------------------------------- +// state support +//---------------------------------------------------------- + +void +__ompt_thread_assign_wait_id(void *variable) +{ + int gtid = __kmp_gtid_get_specific(); + kmp_info_t *ti = ompt_get_thread_gtid(gtid); + + ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t) variable; +} + +ompt_state_t +__ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) +{ + kmp_info_t *ti = ompt_get_thread(); + + if (ti) { + if (ompt_wait_id) + *ompt_wait_id = ti->th.ompt_thread_info.wait_id; + return ti->th.ompt_thread_info.state; + } + return ompt_state_undefined; +} + +//---------------------------------------------------------- +// idle frame support +//---------------------------------------------------------- + +void * +__ompt_get_idle_frame_internal(void) +{ + kmp_info_t *ti = ompt_get_thread(); + return ti ? ti->th.ompt_thread_info.idle_frame : NULL; +} + + +//---------------------------------------------------------- +// parallel region support +//---------------------------------------------------------- + +ompt_parallel_id_t +__ompt_parallel_id_new(int gtid) +{ + static uint64_t ompt_parallel_id = 1; + return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0; +} + + +void * +__ompt_get_parallel_function_internal(int depth) +{ + ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); + void *function = info ? info->microtask : NULL; + return function; +} + + +ompt_parallel_id_t +__ompt_get_parallel_id_internal(int depth) +{ + ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); + ompt_parallel_id_t id = info ? info->parallel_id : 0; + return id; +} + + +int +__ompt_get_parallel_team_size_internal(int depth) +{ + // initialize the return value with the error value. + // if there is a team at the specified depth, the default + // value will be overwritten the size of that team. + int size = -1; + (void) __ompt_get_teaminfo(depth, &size); + return size; +} + + +//---------------------------------------------------------- +// lightweight task team support +//---------------------------------------------------------- + +void +__ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, + int gtid, void *microtask, + ompt_parallel_id_t ompt_pid) +{ + lwt->ompt_team_info.parallel_id = ompt_pid; + lwt->ompt_team_info.microtask = microtask; + lwt->ompt_task_info.task_id = 0; + lwt->ompt_task_info.frame.reenter_runtime_frame = 0; + lwt->ompt_task_info.frame.exit_runtime_frame = 0; + lwt->ompt_task_info.function = NULL; + lwt->parent = 0; +} + + +void +__ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) +{ + ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info; + lwt->parent = my_parent; + thr->th.th_team->t.ompt_serialized_team_info = lwt; +} + + +ompt_lw_taskteam_t * +__ompt_lw_taskteam_unlink(kmp_info_t *thr) +{ + ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; + if (lwtask) thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; + return lwtask; +} + + +//---------------------------------------------------------- +// task support +//---------------------------------------------------------- + +ompt_task_id_t +__ompt_task_id_new(int gtid) +{ + static uint64_t ompt_task_id = 1; + return NEXT_ID(&ompt_task_id, gtid); +} + + +ompt_task_id_t +__ompt_get_task_id_internal(int depth) +{ + ompt_task_info_t *info = __ompt_get_taskinfo(depth); + ompt_task_id_t task_id = info ? info->task_id : 0; + return task_id; +} + + +void * +__ompt_get_task_function_internal(int depth) +{ + ompt_task_info_t *info = __ompt_get_taskinfo(depth); + void *function = info ? info->function : NULL; + return function; +} + + +ompt_frame_t * +__ompt_get_task_frame_internal(int depth) +{ + ompt_task_info_t *info = __ompt_get_taskinfo(depth); + ompt_frame_t *frame = info ? frame = &info->frame : NULL; + return frame; +} + + +//---------------------------------------------------------- +// team support +//---------------------------------------------------------- + +void +__ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) +{ + team->t.ompt_team_info.parallel_id = ompt_pid; +} + + +//---------------------------------------------------------- +// runtime version support +//---------------------------------------------------------- + +const char * +__ompt_get_runtime_version_internal() +{ + return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]; +} Index: runtime/tools/build.pl =================================================================== --- runtime/tools/build.pl +++ runtime/tools/build.pl @@ -65,12 +65,15 @@ "omp-version" => { targets => "rtl", base => 0, parms => { 40 => "*", 30 => "" }, suffix => sub { $_[ 0 ]; } }, "coverage" => { targets => "rtl", base => 0, parms => { off => "*", on => "" }, suffix => sub { $_[ 0 ] eq "on" ? "c1" : "c0"; } }, "stats" => { targets => "rtl", base => 0, parms => { off => "*", on => "" }, suffix => sub { $_[ 0 ] eq "on" ? "s1" : "s0"; } }, + "ompt-support" => { targets => "rtl", base => 0, parms => { off => "*", on => "" }, suffix => sub { $_[ 0 ] eq "on" ? "ompt" : "" } }, + "ompt-blame" => { targets => "rtl", base => 0, parms => { off => "", on => "*" }, suffix => sub { $_[ 0 ] eq "on" ? "" : "no-ompt-blame" } }, + "ompt-trace" => { targets => "rtl", base => 0, parms => { off => "", on => "*" }, suffix => sub { $_[ 0 ] eq "on" ? "" : "no-ompt-trace" } }, }; my $synonyms = { "debug" => [ qw{ dbg debg } ], }; # This array specifies order of options to process, so it cannot be initialized with keys( %$opts ). -my @all_opts = qw{ target version lib-type link-type mode omp-version coverage stats }; +my @all_opts = qw{ target version lib-type link-type mode omp-version coverage stats ompt-support ompt-blame ompt-trace }; # This is the list of base options. my @base_opts = grep( $opts->{ $_ }->{ base } == 1, @all_opts ); # This is the list of extra options. @@ -271,6 +274,9 @@ "VERSION=" . $set->{ version }, "suffix=" . $suf, "stats=" . $set->{ stats }, + "OMPT_SUPPORT=" . $set->{ "ompt-support" }, + "OMPT_BLAME=" . $set->{ "ompt-blame" }, + "OMPT_TRACE=" . $set->{ "ompt-trace" }, @goals, ], build_dir => $build_dir