Index: runtime/CMakeLists.txt =================================================================== --- runtime/CMakeLists.txt +++ runtime/CMakeLists.txt @@ -321,12 +321,11 @@ # OMPT-support set(LIBOMP_OMPT_DEBUG FALSE CACHE BOOL "Trace OMPT initialization?") +#after testing: turn on ompt support for OpenMP 5.0 and higher (see commit 5e848176) set(LIBOMP_OMPT_SUPPORT FALSE CACHE BOOL "OMPT-support?") -set(LIBOMP_OMPT_BLAME TRUE CACHE BOOL - "OMPT-blame?") -set(LIBOMP_OMPT_TRACE TRUE CACHE BOOL - "OMPT-trace?") +set(LIBOMP_OMPT_OPTIONAL TRUE CACHE BOOL + "OMPT-optional?") if(LIBOMP_OMPT_SUPPORT AND (NOT LIBOMP_HAVE_OMPT_SUPPORT)) libomp_error_say("OpenMP Tools Interface requested but not available in this implementation") endif() @@ -396,8 +395,7 @@ libomp_say("Use ITT notify -- ${LIBOMP_USE_ITT_NOTIFY}") libomp_say("Use OMPT-support -- ${LIBOMP_OMPT_SUPPORT}") if(${LIBOMP_OMPT_SUPPORT}) - libomp_say("Use OMPT-blame -- ${LIBOMP_OMPT_BLAME}") - libomp_say("Use OMPT-trace -- ${LIBOMP_OMPT_TRACE}") + libomp_say("Use OMPT-optional -- ${LIBOMP_OMPT_OPTIONAL}") endif() libomp_say("Use Adaptive locks -- ${LIBOMP_USE_ADAPTIVE_LOCKS}") libomp_say("Use quad precision -- ${LIBOMP_USE_QUAD_PRECISION}") Index: runtime/src/exports_so.txt =================================================================== --- runtime/src/exports_so.txt +++ runtime/src/exports_so.txt @@ -25,8 +25,7 @@ # # OMPT API # - ompt_tool; # OMPT initialization interface - ompt_control; # OMPT control interface + ompt_start_tool; # OMPT start interface # icc drops weak attribute at linking step without the following line: Annotate*; # TSAN annotation Index: runtime/src/include/50/omp.h.var =================================================================== --- runtime/src/include/50/omp.h.var +++ runtime/src/include/50/omp.h.var @@ -182,6 +182,23 @@ extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); + /* OpenMP 5.0 Tool Control */ + typedef enum omp_control_tool_result_t { + omp_control_tool_notool = -2, + omp_control_tool_nocallback = -1, + omp_control_tool_success = 0, + omp_control_tool_ignored = 1 + } omp_control_tool_result_t; + + typedef enum omp_control_tool_t { + omp_control_tool_start = 1, + omp_control_tool_pause = 2, + omp_control_tool_flush = 3, + omp_control_tool_end = 4 + } omp_control_tool_t; + + extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*); + # undef __KAI_KMPC_CONVENTION /* Warning: Index: runtime/src/include/50/omp_lib.h.var =================================================================== --- runtime/src/include/50/omp_lib.h.var +++ runtime/src/include/50/omp_lib.h.var @@ -29,6 +29,8 @@ integer, parameter :: kmp_size_t_kind = int_ptr_kind() integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() integer, parameter :: omp_lock_hint_kind = omp_integer_kind + integer, parameter :: omp_control_tool_kind = omp_integer_kind + integer, parameter :: omp_control_tool_result_kind = omp_integer_kind integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ @@ -57,6 +59,16 @@ integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_start = 1 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_pause = 2 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_flush = 3 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_end = 4 + + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_notool = -2 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_nocallback = -1 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1 + interface ! *** @@ -494,6 +506,13 @@ integer (kind=omp_lock_hint_kind), value :: hint end subroutine omp_init_nest_lock_with_hint + function omp_control_tool(command, modifier) bind(c) + import + integer (kind=omp_integer_kind) omp_control_tool + integer (kind=omp_control_tool_kind), value :: command + integer (kind=omp_control_tool_kind), value :: modifier + end function omp_control_tool + end interface !DIR$ IF DEFINED (__INTEL_OFFLOAD) Index: runtime/src/include/50/omp_lib.f.var =================================================================== --- runtime/src/include/50/omp_lib.f.var +++ runtime/src/include/50/omp_lib.f.var @@ -32,6 +32,8 @@ integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() integer, parameter :: kmp_cancel_kind = omp_integer_kind integer, parameter :: omp_lock_hint_kind = omp_integer_kind + integer, parameter :: omp_control_tool_kind = omp_integer_kind + integer, parameter :: omp_control_tool_result_kind = omp_integer_kind end module omp_lib_kinds @@ -518,6 +520,13 @@ integer (kind=omp_lock_hint_kind) hint end subroutine omp_init_nest_lock_with_hint + function omp_control_tool(command, modifier) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_control_tool + integer (kind=omp_control_tool_kind) command + integer (kind=omp_control_tool_kind) modifier + end function omp_control_tool + end interface !dec$ if defined(_WIN32) @@ -563,6 +572,7 @@ !dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation !dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device !dec$ attributes alias:'OMP_GET_MAX_TASK_PRIORITY' :: omp_get_max_task_priority +!dec$ attributes alias:'OMP_CONTROL_TOOL' :: omp_control_tool !dec$ attributes alias:'omp_init_lock' :: omp_init_lock !dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint @@ -643,6 +653,7 @@ !dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation !dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device !dec$ attributes alias:'_OMP_GET_MAX_TASK_PRIORTY' :: omp_get_max_task_priority +!dec$ attributes alias:'_OMP_CONTROL_TOOL' :: omp_control_tool !dec$ attributes alias:'_omp_init_lock' :: omp_init_lock !dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint @@ -739,6 +750,7 @@ !dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock !dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock !dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock +!dec$ attributes alias:'omp_control_tool_'::omp_control_tool !dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize !dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s @@ -818,6 +830,7 @@ !dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock !dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock !dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock +!dec$ attributes alias:'_omp_control_tool_'::omp_control_tool !dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize !dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s Index: runtime/src/include/50/omp_lib.f90.var =================================================================== --- runtime/src/include/50/omp_lib.f90.var +++ runtime/src/include/50/omp_lib.f90.var @@ -28,6 +28,8 @@ integer, parameter :: kmp_affinity_mask_kind = c_intptr_t integer, parameter :: kmp_cancel_kind = omp_integer_kind integer, parameter :: omp_lock_hint_kind = omp_integer_kind + integer, parameter :: omp_control_tool_kind = omp_integer_kind + integer, parameter :: omp_control_tool_result_kind = omp_integer_kind end module omp_lib_kinds @@ -68,6 +70,16 @@ integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_start = 1 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_pause = 2 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_flush = 3 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_end = 4 + + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_notool = -2 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_nocallback = -1 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1 + interface ! *** @@ -519,6 +531,13 @@ integer (kind=omp_lock_hint_kind), value :: hint end subroutine omp_init_nest_lock_with_hint + function omp_control_tool(command, modifier) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_control_tool + integer (kind=omp_control_tool_kind), value :: command + integer (kind=omp_control_tool_kind), value :: modifier + end function omp_control_tool + end interface end module omp_lib Index: runtime/src/include/50/ompt.h.var =================================================================== --- runtime/src/include/50/ompt.h.var +++ runtime/src/include/50/ompt.h.var @@ -10,6 +10,7 @@ *****************************************************************************/ #include +#include @@ -17,21 +18,28 @@ * iteration macros *****************************************************************************/ -#define FOREACH_OMPT_INQUIRY_FN(macro) \ - macro (ompt_enumerate_state) \ - \ - macro (ompt_set_callback) \ - macro (ompt_get_callback) \ - \ - macro (ompt_get_idle_frame) \ - macro (ompt_get_task_frame) \ - \ - macro (ompt_get_state) \ - \ - macro (ompt_get_parallel_id) \ - macro (ompt_get_parallel_team_size) \ - macro (ompt_get_task_id) \ - macro (ompt_get_thread_id) +#define FOREACH_OMPT_INQUIRY_FN(macro) \ + macro (ompt_enumerate_states) \ + macro (ompt_enumerate_mutex_impls) \ + \ + macro (ompt_set_callback) \ + macro (ompt_get_callback) \ + \ + macro (ompt_get_state) \ + \ + macro (ompt_get_parallel_info) \ + macro (ompt_get_task_info) \ + macro (ompt_get_thread_data) \ + macro (ompt_get_unique_id) \ + \ + macro(ompt_get_num_places) \ + macro(ompt_get_place_proc_ids) \ + macro(ompt_get_place_num) \ + macro(ompt_get_partition_place_nums) \ + macro(ompt_get_proc_id) \ + \ + macro(ompt_get_target_info) \ + macro(ompt_get_num_devices) #define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ macro (ompt_idle) \ @@ -40,141 +48,107 @@ macro (ompt_task_wait) \ macro (ompt_mutex_wait) -#define FOREACH_OMPT_STATE(macro) \ +#define FOREACH_OMP_STATE(macro) \ \ - /* first */ \ - macro (ompt_state_first, 0x71) /* initial enumeration state */ \ + /* first available state */ \ + macro (omp_state_undefined, 0x102) /* undefined thread state */ \ \ /* work states (0..15) */ \ - macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \ - macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \ - macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \ + macro (omp_state_work_serial, 0x000) /* working outside parallel */ \ + macro (omp_state_work_parallel, 0x001) /* working within parallel */ \ + macro (omp_state_work_reduction, 0x002) /* performing a reduction */ \ \ - /* idle (16..31) */ \ - macro (ompt_state_idle, 0x10) /* waiting for work */ \ + /* barrier wait states (16..31) */ \ + macro (omp_state_wait_barrier, 0x010) /* waiting at a barrier */ \ + macro (omp_state_wait_barrier_implicit_parallel, 0x011) \ + /* implicit barrier at the end of parallel region */\ + macro (omp_state_wait_barrier_implicit_workshare, 0x012) \ + /* implicit barrier at the end of worksharing */ \ + macro (omp_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \ + macro (omp_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \ \ - /* overhead states (32..63) */ \ - macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \ + /* task wait states (32..63) */ \ + macro (omp_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \ + macro (omp_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \ \ - /* barrier wait states (64..79) */ \ - macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \ - macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \ - macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \ + /* mutex wait states (64..127) */ \ + macro (omp_state_wait_mutex, 0x040) \ + macro (omp_state_wait_lock, 0x041) /* waiting for lock */ \ + macro (omp_state_wait_critical, 0x042) /* waiting for critical */ \ + macro (omp_state_wait_atomic, 0x043) /* waiting for atomic */ \ + macro (omp_state_wait_ordered, 0x044) /* waiting for ordered */ \ \ - /* task wait states (80..95) */ \ - macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \ - macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \ + /* target wait states (128..255) */ \ + macro (omp_state_wait_target, 0x080) /* waiting for target region */ \ + macro (omp_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \ + macro (omp_state_wait_target_update, 0x082) /* waiting for target update operation */ \ \ - /* mutex wait states (96..111) */ \ - macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \ - macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \ - macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \ - macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \ - macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \ - macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \ + /* misc (256..511) */ \ + macro (omp_state_idle, 0x100) /* waiting for work */ \ + macro (omp_state_overhead, 0x101) /* overhead excluding wait states */ \ \ - /* misc (112..127) */ \ - macro (ompt_state_undefined, 0x70) /* undefined thread state */ + /* implementation-specific states (512..) */ +#define FOREACH_OMPT_MUTEX_IMPL(macro) \ + macro (ompt_mutex_impl_unknown, 0) /* unknown implementatin */ \ + macro (ompt_mutex_impl_spin, 1) /* based on spin */ \ + macro (ompt_mutex_impl_queuing, 2) /* based on some fair policy */ \ + macro (ompt_mutex_impl_speculative, 3) /* based on HW-supported speculation */ + #define FOREACH_OMPT_EVENT(macro) \ \ /*--- Mandatory Events ---*/ \ - macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \ - macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \ - \ - macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \ - macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \ - \ - macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \ - macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \ + macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ + macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \ \ - macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \ + macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \ + macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \ \ - macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \ - \ - /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ - macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \ - macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \ + macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \ + macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ + macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ \ - macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \ - macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \ + macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ + macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op*/ \ + macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit*/ \ \ - macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \ - macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \ + macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ \ - macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\ - macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \ + macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ + macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ \ - macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \ - macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \ - macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \ + /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ \ - macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \ + macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 14) /* sync region wait begin or end*/ \ \ - macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \ + macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 15) /* mutex released */ \ \ /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ - macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \ - macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \ - \ - macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \ - macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \ - \ - macro (ompt_event_task_switch, ompt_task_pair_callback_t, 26) /* task switch */ \ - \ - macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \ - macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \ - \ - macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\ - macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \ \ - macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \ - macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \ + macro (ompt_callback_task_dependences, ompt_callback_task_dependences_t, 16) /* report task dependences */\ + macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 17) /* report task dependence */\ \ - macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \ - macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \ + macro (ompt_callback_work, ompt_callback_work_t, 18) /* task at work begin or end*/\ \ - macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ - macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \ + macro (ompt_callback_master, ompt_callback_master_t, 19) /* task at master begin or end */\ \ - macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \ - macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \ + macro (ompt_callback_target_map, ompt_callback_target_map_t, 20) /* target map */ \ \ - macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \ - macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \ + macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 21) /* sync region begin or end */ \ \ - macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \ - macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \ + macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 22) /* lock init */ \ + macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 23) /* lock destroy */ \ \ - macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\ - macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \ + macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 24) /* mutex acquire */ \ + macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 25) /* mutex acquired */ \ \ - macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \ + macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 26) /* nest lock */ \ \ - macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \ - macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \ - macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \ - macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \ - macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \ + macro (ompt_callback_flush, ompt_callback_flush_t, 27) /* after executing flush */ \ \ - macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \ - macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \ - macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \ - macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \ - macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \ - macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \ - \ - macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \ - macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \ - \ - macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ - macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ - \ - macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ \ - \ - macro (ompt_event_task_dependences, ompt_task_dependences_callback_t, 69) /* report task dependences */\ - macro (ompt_event_task_dependence_pair, ompt_task_pair_callback_t, 70) /* report task dependence pair */ + macro (ompt_callback_cancel, ompt_callback_cancel_t, 28) /*cancel innermost binding region*/\ + macro (ompt_callback_idle, ompt_callback_idle_t, 29) /* begin or end idle state */\ @@ -186,18 +160,20 @@ * identifiers *---------------------*/ -typedef uint64_t ompt_thread_id_t; -#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */ +typedef uint64_t ompt_id_t; +#define ompt_id_none 0 -typedef uint64_t ompt_task_id_t; -#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */ +typedef union ompt_data_u { + uint64_t value; /* data initialized by runtime to unique id */ + void *ptr; /* pointer under tool control */ +} ompt_data_t; -typedef uint64_t ompt_parallel_id_t; -#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ +static const ompt_data_t ompt_data_none = {0}; typedef uint64_t ompt_wait_id_t; -#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */ +static const ompt_wait_id_t ompt_wait_id_none = 0; +typedef void ompt_device_t; /*--------------------- * ompt_frame_t @@ -235,35 +211,44 @@ *---------------------*/ typedef enum { -#define ompt_state_macro(state, code) state = code, - FOREACH_OMPT_STATE(ompt_state_macro) -#undef ompt_state_macro -} ompt_state_t; +#define omp_state_macro(state, code) state = code, + FOREACH_OMP_STATE(omp_state_macro) +#undef omp_state_macro +} omp_state_t; /*--------------------- * runtime events *---------------------*/ -typedef enum { +typedef enum ompt_callbacks_e{ #define ompt_event_macro(event, callback, eventid) event = eventid, FOREACH_OMPT_EVENT(ompt_event_macro) #undef ompt_event_macro -} ompt_event_t; +} ompt_callbacks_t; /*--------------------- * set callback results *---------------------*/ -typedef enum { - ompt_set_result_registration_error = 0, - ompt_set_result_event_may_occur_no_callback = 1, - ompt_set_result_event_never_occurs = 2, - ompt_set_result_event_may_occur_callback_some = 3, - ompt_set_result_event_may_occur_callback_always = 4, +typedef enum ompt_set_result_e { + ompt_set_error = 0, + ompt_set_never = 1, + ompt_set_sometimes = 2, + ompt_set_sometimes_paired = 3, + ompt_set_always = 4 } ompt_set_result_t; +/*---------------------- + * mutex implementations + *----------------------*/ +typedef enum ompt_mutex_impl_e { +#define ompt_mutex_impl_macro(impl, code) impl = code, + FOREACH_OMPT_MUTEX_IMPL(ompt_mutex_impl_macro) +#undef ompt_mutex_impl_macro +} ompt_mutex_impl_t; + /***************************************************************************** * callback signatures @@ -273,14 +258,10 @@ typedef void (*ompt_interface_fn_t)(void); typedef ompt_interface_fn_t (*ompt_function_lookup_t)( - const char * /* entry point to look up */ + const char * /* entry point to look up */ ); /* threads */ -typedef void (*ompt_thread_callback_t) ( - ompt_thread_id_t thread_id /* ID of thread */ -); - typedef enum { ompt_thread_initial = 1, // start the enumeration at 1 ompt_thread_worker = 2, @@ -288,78 +269,262 @@ } ompt_thread_type_t; typedef enum { - ompt_invoker_program = 0, /* program invokes master task */ - ompt_invoker_runtime = 1 /* runtime invokes master task */ + ompt_invoker_program = 1, /* program invokes master task */ + ompt_invoker_runtime = 2 /* runtime invokes master task */ } ompt_invoker_t; -typedef void (*ompt_thread_type_callback_t) ( - ompt_thread_type_t thread_type, /* type of thread */ - ompt_thread_id_t thread_id /* ID of thread */ +typedef void (*ompt_callback_thread_begin_t) ( + ompt_thread_type_t thread_type, /* type of thread */ + ompt_data_t *thread_data /* data of thread */ +); + +typedef void (*ompt_callback_thread_end_t) ( + ompt_data_t *thread_data /* data of thread */ ); typedef void (*ompt_wait_callback_t) ( - ompt_wait_id_t wait_id /* wait id */ + ompt_wait_id_t wait_id /* wait data */ ); /* parallel and workshares */ -typedef void (*ompt_parallel_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t task_id /* id of task */ +typedef enum ompt_scope_endpoint_e { + ompt_scope_begin = 1, + ompt_scope_end = 2 +} ompt_scope_endpoint_t; + + +/* implicit task */ +typedef void (*ompt_callback_implicit_task_t) ( + ompt_scope_endpoint_t endpoint, /* endpoint of implicit task */ + ompt_data_t *parallel_data, /* data of parallel region */ + ompt_data_t *task_data, /* data of implicit task */ + unsigned int team_size, /* team size */ + unsigned int thread_num /* thread number of calling thread */ ); -typedef void (*ompt_new_workshare_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t parent_task_id, /* id of parent task */ - void *workshare_function /* pointer to outlined function */ +typedef void (*ompt_callback_parallel_begin_t) ( + ompt_data_t *parent_task_data, /* data of parent task */ + const ompt_frame_t *parent_frame, /* frame data of parent task */ + ompt_data_t *parallel_data, /* data of parallel region */ + unsigned int requested_team_size, /* requested number of threads in team */ + ompt_invoker_t invoker, /* invoker of master task */ + const void *codeptr_ra /* return address of runtime call */ ); -typedef void (*ompt_new_parallel_callback_t) ( - ompt_task_id_t parent_task_id, /* id of parent task */ - ompt_frame_t *parent_task_frame, /* frame data of parent task */ - ompt_parallel_id_t parallel_id, /* id of parallel region */ - uint32_t requested_team_size, /* number of threads in team */ - void *parallel_function, /* pointer to outlined function */ - ompt_invoker_t invoker /* who invokes master task? */ +typedef void (*ompt_callback_parallel_end_t) ( + ompt_data_t *parallel_data, /* data of parallel region */ + ompt_data_t *task_data, /* data of task */ + ompt_invoker_t invoker, /* invoker of master task */ + const void *codeptr_ra /* return address of runtime call */ ); -typedef void (*ompt_end_parallel_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t task_id, /* id of task */ - ompt_invoker_t invoker /* who invokes master task? */ +/* tasks */ +typedef enum ompt_task_type_e { + ompt_task_initial = 0x1, + ompt_task_implicit = 0x2, + ompt_task_explicit = 0x4, + ompt_task_target = 0x8, + ompt_task_undeferred = 0x8000000, + ompt_task_untied = 0x10000000, + ompt_task_final = 0x20000000, + ompt_task_mergeable = 0x40000000, + ompt_task_merged = 0x80000000 +} ompt_task_type_t; + +typedef enum ompt_task_status_e { + ompt_task_complete = 1, + ompt_task_yield = 2, + ompt_task_cancel = 3, + ompt_task_others = 4 +} ompt_task_status_t; + +typedef void (*ompt_callback_task_schedule_t) ( + ompt_data_t *prior_task_data, /* data of prior task */ + ompt_task_status_t prior_task_status, /* status of prior task */ + ompt_data_t *next_task_data /* data of next task */ ); -/* tasks */ -typedef void (*ompt_task_callback_t) ( - ompt_task_id_t task_id /* id of task */ +typedef void (*ompt_callback_task_create_t) ( + ompt_data_t *parent_task_data, /* data of parent task */ + const ompt_frame_t *parent_frame, /* frame data for parent task */ + ompt_data_t *new_task_data, /* data of created task */ + int type, /* type of created task */ + int has_dependences, /* created task has dependences */ + const void *codeptr_ra /* return address of runtime call */ ); -typedef void (*ompt_task_pair_callback_t) ( - ompt_task_id_t first_task_id, - ompt_task_id_t second_task_id +/* task dependences */ +typedef void (*ompt_callback_task_dependences_t) ( + ompt_data_t *task_data, /* data of task */ + const ompt_task_dependence_t *deps, /* dependences of task */ + int ndeps /* dependences count of task */ ); -typedef void (*ompt_new_task_callback_t) ( - ompt_task_id_t parent_task_id, /* id of parent task */ - ompt_frame_t *parent_task_frame, /* frame data for parent task */ - ompt_task_id_t new_task_id, /* id of created task */ - void *task_function /* pointer to outlined function */ +typedef void (*ompt_callback_task_dependence_t) ( + ompt_data_t *src_task_data, /* data of source task */ + ompt_data_t *sink_task_data /* data of sink task */ ); -/* task dependences */ -typedef void (*ompt_task_dependences_callback_t) ( - ompt_task_id_t task_id, /* ID of task with dependences */ - const ompt_task_dependence_t *deps,/* vector of task dependences */ - int ndeps /* number of dependences */ +/* target and device */ +typedef enum ompt_target_type_e { + ompt_target = 1, + ompt_target_enter_data = 2, + ompt_target_exit_data = 3, + ompt_target_update = 4 +} ompt_target_type_t; + +typedef void (*ompt_callback_target_t) ( + ompt_target_type_t kind, + ompt_scope_endpoint_t endpoint, + uint64_t device_num, + ompt_data_t *task_data, + ompt_id_t target_id, + const void *codeptr_ra ); -/* program */ -typedef void (*ompt_control_callback_t) ( - uint64_t command, /* command of control call */ - uint64_t modifier /* modifier of control call */ +typedef enum ompt_target_data_op_e { + ompt_target_data_alloc = 1, + ompt_target_data_transfer_to_dev = 2, + ompt_target_data_transfer_from_dev = 3, + ompt_target_data_delete = 4 +} ompt_target_data_op_t; + +typedef void (*ompt_callback_target_data_op_t) ( + ompt_id_t target_id, + ompt_id_t host_op_id, + ompt_target_data_op_t optype, + void *host_addr, + void *device_addr, + size_t bytes ); -typedef void (*ompt_callback_t)(void); +typedef void (*ompt_callback_target_submit_t) ( + ompt_id_t target_id, + ompt_id_t host_op_id +); +typedef void (*ompt_callback_target_map_t) ( + ompt_id_t target_id, + unsigned int nitems, + void **host_addr, + void **device_addr, + size_t *bytes, + unsigned int *mapping_flags +); + +typedef void (*ompt_callback_device_initialize_t) ( + uint64_t device_num, + const char *type, + ompt_device_t *device, + ompt_function_lookup_t lookup, + const char *documentation +); + +typedef void (*ompt_callback_device_finalize_t) ( + uint64_t device_num +); + +/* control_tool */ +typedef int (*ompt_callback_control_tool_t) ( + uint64_t command, /* command of control call */ + uint64_t modifier, /* modifier of control call */ + void *arg, /* argument of control call */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef enum ompt_mutex_kind_e { + ompt_mutex = 0x10, + ompt_mutex_lock = 0x11, + ompt_mutex_nest_lock = 0x12, + ompt_mutex_critical = 0x13, + ompt_mutex_atomic = 0x14, + ompt_mutex_ordered = 0x20 +} ompt_mutex_kind_t; + +typedef void (*ompt_callback_mutex_acquire_t) ( + ompt_mutex_kind_t kind, /* mutex kind */ + unsigned int hint, /* mutex hint */ + unsigned int impl, /* mutex implementation */ + ompt_wait_id_t wait_id, /* id of object being awaited */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef void (*ompt_callback_mutex_t) ( + ompt_mutex_kind_t kind, /* mutex kind */ + ompt_wait_id_t wait_id, /* id of object being awaited */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef void (*ompt_callback_nest_lock_t) ( + ompt_scope_endpoint_t endpoint, /* endpoint of nested lock */ + ompt_wait_id_t wait_id, /* id of object being awaited */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef void (*ompt_callback_master_t) ( + ompt_scope_endpoint_t endpoint, /* endpoint of master region */ + ompt_data_t *parallel_data, /* data of parallel region */ + ompt_data_t *task_data, /* data of task */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef void (*ompt_callback_idle_t) ( + ompt_scope_endpoint_t endpoint /* endpoint of idle time */ +); + +typedef enum ompt_work_type_e { + ompt_work_loop = 1, + ompt_work_sections = 2, + ompt_work_single_executor = 3, + ompt_work_single_other = 4, + ompt_work_workshare = 5, + ompt_work_distribute = 6, + ompt_work_taskloop = 7 +} ompt_work_type_t; + +typedef void (*ompt_callback_work_t) ( + ompt_work_type_t wstype, /* type of work region */ + ompt_scope_endpoint_t endpoint, /* endpoint of work region */ + ompt_data_t *parallel_data, /* data of parallel region */ + ompt_data_t *task_data, /* data of task */ + uint64_t count, /* quantity of work */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef enum ompt_sync_region_kind_e { + ompt_sync_region_barrier = 1, + ompt_sync_region_taskwait = 2, + ompt_sync_region_taskgroup = 3 +} ompt_sync_region_kind_t; + +typedef void (*ompt_callback_sync_region_t) ( + ompt_sync_region_kind_t kind, /* kind of sync region */ + ompt_scope_endpoint_t endpoint, /* endpoint of sync region */ + ompt_data_t *parallel_data, /* data of parallel region */ + ompt_data_t *task_data, /* data of task */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef enum ompt_cancel_flag_e { + ompt_cancel_parallel = 0x1, + ompt_cancel_sections = 0x2, + ompt_cancel_do = 0x4, + ompt_cancel_taskgroup = 0x8, + ompt_cancel_activated = 0x10, + ompt_cancel_detected = 0x20, + ompt_cancel_discarded_task = 0x40 +} ompt_cancel_flag_t; + +typedef void (*ompt_callback_cancel_t) ( + ompt_data_t *task_data, /* data of task */ + int flags, /* cancel flags */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef void (*ompt_callback_flush_t) ( + ompt_data_t *thread_data, /* data of thread */ + const void *codeptr_ra /* return address of runtime call */ +); /**************************************************************************** * ompt API @@ -381,33 +546,48 @@ ***************************************************************************/ /* state */ -OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( - ompt_wait_id_t *ompt_wait_id +OMPT_API_FUNCTION(omp_state_t, ompt_get_state, ( + ompt_wait_id_t *wait_id )); /* thread */ -OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); - -OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); +OMPT_API_FUNCTION(ompt_data_t*, ompt_get_thread_data, (void)); /* parallel region */ -OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( - int ancestor_level +OMPT_API_FUNCTION(int, ompt_get_parallel_info, ( + int ancestor_level, + ompt_data_t **parallel_data, + int *team_size )); -OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( - int ancestor_level +/* task */ +OMPT_API_FUNCTION(int, ompt_get_task_info, ( + int ancestor_level, + int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num )); -/* task */ -OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( - int depth +/* places */ +OMPT_API_FUNCTION(int, ompt_get_num_places, (void)); + +OMPT_API_FUNCTION(int, ompt_get_place_proc_ids, ( + int place_num, + int ids_size, + int *ids )); -OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( - int depth +OMPT_API_FUNCTION(int, ompt_get_place_num, (void)); + +OMPT_API_FUNCTION(int, ompt_get_partition_place_nums, ( + int place_nums_size, + int *place_nums )); +/* proc_id */ +OMPT_API_FUNCTION(int, ompt_get_proc_id, (void)); /**************************************************************************** @@ -445,25 +625,35 @@ * INITIALIZATION FUNCTIONS ***************************************************************************/ -OMPT_API_FUNCTION(void, ompt_initialize, ( +typedef struct ompt_fns_t ompt_fns_t; + +OMPT_API_FUNCTION(int, ompt_initialize, ( ompt_function_lookup_t ompt_fn_lookup, - const char *runtime_version, - unsigned int ompt_version + ompt_fns_t *fns )); +OMPT_API_FUNCTION(void, ompt_finalize, ( + ompt_fns_t *fns +)); + +struct ompt_fns_t { + ompt_initialize_t initialize; + ompt_finalize_t finalize; +}; /* initialization interface to be defined by tool */ -ompt_initialize_t ompt_tool(void); +#ifdef _WIN32 +__declspec(dllexport) +#endif +ompt_fns_t * ompt_start_tool( + unsigned int omp_version, + const char * runtime_version +); -typedef enum opt_init_mode_e { - ompt_init_mode_never = 0, - ompt_init_mode_false = 1, - ompt_init_mode_true = 2, - ompt_init_mode_always = 3 -} ompt_init_mode_t; +typedef void (*ompt_callback_t)(void); OMPT_API_FUNCTION(int, ompt_set_callback, ( - ompt_event_t event, + ompt_callbacks_t which, ompt_callback_t callback )); @@ -477,7 +667,7 @@ OMPT_API_FUNCTION(int, ompt_get_callback, ( - ompt_event_t event, + ompt_callbacks_t which, ompt_callback_t *callback )); @@ -487,29 +677,37 @@ * MISCELLANEOUS FUNCTIONS ***************************************************************************/ -/* control */ -// FIXME: remove workaround for clang -#if !defined(__clang__) && defined(_OPENMP) && (_OPENMP >= 201307) -#pragma omp declare target -#endif -void ompt_control( - uint64_t command, - uint64_t modifier -); -#if !defined(__clang__) && defined(_OPENMP) && (_OPENMP >= 201307) -#pragma omp end declare target -#endif - /* state enumeration */ -OMPT_API_FUNCTION(int, ompt_enumerate_state, ( +OMPT_API_FUNCTION(int, ompt_enumerate_states, ( int current_state, int *next_state, const char **next_state_name )); +/* mutex implementation enumeration */ +OMPT_API_FUNCTION(int, ompt_enumerate_mutex_impls, ( + int current_impl, + int *next_impl, + const char **next_impl_name +)); + +/* get_unique_id */ +OMPT_API_FUNCTION(uint64_t, ompt_get_unique_id, (void)); + #ifdef __cplusplus }; #endif -#endif +/**************************************************************************** + * TARGET + ***************************************************************************/ + + OMPT_API_FUNCTION(int, ompt_get_target_info, ( + uint64_t *device_num, + ompt_id_t *target_id, + ompt_id_t *host_op_id +)); + + OMPT_API_FUNCTION(int, ompt_get_num_devices, (void)); +#endif /* __OMPT__ */ Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -200,6 +200,10 @@ #define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140 #define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0 +#define KMP_IDENT_WORK_LOOP 0x200 // static loop +#define KMP_IDENT_WORK_SECTIONS 0x400 // sections +#define KMP_IDENT_WORK_DISTRIBUTE 0x800 // distribute + /*! * The ident structure that describes a source location. */ @@ -798,6 +802,10 @@ extern int __kmp_hws_requested; extern int __kmp_hws_abs_flag; // absolute or per-item number requested +#if OMP_50_ENABLED && LIBOMP_OMPT_SUPPORT +extern char const *__kmp_tool_libraries; +#endif // OMP_50_ENABLED && LIBOMP_OMPT_SUPPORT + /* ------------------------------------------------------------------------ */ #define KMP_PAD(type, sz) \ @@ -3314,7 +3322,7 @@ extern kmp_team_t * __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, + ompt_data_t ompt_parallel_data, #endif kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs, int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); @@ -3322,7 +3330,7 @@ extern kmp_team_t * __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, + ompt_id_t ompt_parallel_id, #endif kmp_internal_control_t *new_icvs, int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); @@ -3362,9 +3370,6 @@ }; extern int __kmp_fork_call(ident_t *loc, int gtid, enum fork_context_e fork_context, kmp_int32 argc, -#if OMPT_SUPPORT - void *unwrapped_task, -#endif microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX Index: runtime/src/kmp_atomic.h =================================================================== --- runtime/src/kmp_atomic.h +++ runtime/src/kmp_atomic.h @@ -361,19 +361,20 @@ static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck, kmp_int32 gtid) { -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_wait_atomic)((ompt_wait_id_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_atomic, 0, ompt_mutex_impl_queuing, (ompt_wait_id_t)lck, + OMPT_GET_RETURN_ADDRESS(0)); } #endif __kmp_acquire_queuing_lock(lck, gtid); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)( - (ompt_wait_id_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); } #endif } @@ -386,10 +387,10 @@ static inline void __kmp_release_atomic_lock(kmp_atomic_lock_t *lck, kmp_int32 gtid) { __kmp_release_queuing_lock(lck, gtid); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_release_atomic)( - (ompt_wait_id_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); } #endif } Index: runtime/src/kmp_barrier.cpp =================================================================== --- runtime/src/kmp_barrier.cpp +++ runtime/src/kmp_barrier.cpp @@ -16,6 +16,9 @@ #include "kmp_itt.h" #include "kmp_os.h" #include "kmp_stats.h" +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif #if KMP_MIC #include @@ -1224,8 +1227,9 @@ int status = 0; ident_t *loc = __kmp_threads[gtid]->th.th_ident; #if OMPT_SUPPORT - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; + ompt_data_t *my_task_data; + ompt_data_t *my_parallel_data; + void *return_address; #endif KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid, @@ -1233,28 +1237,26 @@ ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); #if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_BLAME - my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - -#if OMPT_TRACE - if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) { - if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) { - ompt_callbacks.ompt_callback(ompt_event_single_others_end)( - my_parallel_id, my_task_id); - } + if (ompt_enabled.enabled) { +#if OMPT_OPTIONAL + my_task_data = OMPT_CUR_TASK_DATA(this_thr); + my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); + return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, + my_task_data, return_address); } -#endif - if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(my_parallel_id, - my_task_id); + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, + my_task_data, return_address); } #endif // It is OK to report the barrier state after the barrier begin callback. // According to the OMPT specification, a compliant implementation may // even delay reporting this state until the barrier begins to wait. - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; + this_thr->th.ompt_thread_info.state = omp_state_wait_barrier; } #endif @@ -1489,14 +1491,20 @@ __kmp_tid_from_gtid(gtid), status)); #if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_BLAME - if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_end)(my_parallel_id, - my_task_id); + if (ompt_enabled.enabled) { +#if OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_end, my_parallel_data, + my_task_data, return_address); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_end, my_parallel_data, + my_task_data, return_address); } #endif - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; + this_thr->th.ompt_thread_info.state = omp_state_work_parallel; } #endif ANNOTATE_BARRIER_END(&team->t.t_bar); @@ -1593,14 +1601,31 @@ ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); #if OMPT_SUPPORT -#if OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } + ompt_data_t *my_task_data; + ompt_data_t *my_parallel_data; + if (ompt_enabled.enabled) { +#if OMPT_OPTIONAL + void *codeptr = NULL; + int ds_tid = this_thr->th.th_info.ds.ds_tid; + if (KMP_MASTER_TID(ds_tid) && + (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || + ompt_callbacks.ompt_callback(ompt_callback_sync_region))) + codeptr = team->t.ompt_team_info.master_return_address; + my_task_data = OMPT_CUR_TASK_DATA(this_thr); + my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, + my_task_data, codeptr); + } + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, + my_task_data, codeptr); + } #endif - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; + this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit; + } #endif if (__kmp_tasking_mode == tskm_extra_barrier) { @@ -1758,20 +1783,6 @@ KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid)); -#if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_BLAME - if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } -#endif - - // return to default state - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif ANNOTATE_BARRIER_END(&team->t.t_bar); } @@ -1869,6 +1880,39 @@ } } +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + if (this_thr->th.ompt_thread_info.state == + omp_state_wait_barrier_implicit) { + int ds_tid = this_thr->th.th_info.ds.ds_tid; + ompt_data_t *tId = (team) ? OMPT_CUR_TASK_DATA(this_thr) + : &(this_thr->th.ompt_thread_info.task_data); + this_thr->th.ompt_thread_info.state = omp_state_overhead; +#if OMPT_OPTIONAL + void *codeptr = NULL; + if (KMP_MASTER_TID(ds_tid) && + (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || + ompt_callbacks.ompt_callback(ompt_callback_sync_region))) + codeptr = team->t.ompt_team_info.master_return_address; + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr); + } +#endif + if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, tId, 0, ds_tid); + } + // return to idle state + this_thr->th.ompt_thread_info.state = omp_state_overhead; + } + } +#endif + // Early exit for reaping threads releasing forkjoin barrier if (TCR_4(__kmp_global.g.g_done)) { this_thr->th.th_task_team = NULL; Index: runtime/src/kmp_cancel.cpp =================================================================== --- runtime/src/kmp_cancel.cpp +++ runtime/src/kmp_cancel.cpp @@ -12,6 +12,9 @@ #include "kmp_i18n.h" #include "kmp_io.h" #include "kmp_str.h" +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif #if OMP_40_ENABLED @@ -51,11 +54,28 @@ kmp_int32 old = KMP_COMPARE_AND_STORE_RET32( &(this_team->t.t_cancel_request), cancel_noreq, cncl_kind); if (old == cancel_noreq || old == cncl_kind) { - // printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n", - // this_team->t.t_cancel_request, - // &(this_team->t.t_cancel_request)); - // we do not have a cancellation request in this team or we do have - // one that matches the current request -> cancel +// printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n", +// this_team->t.t_cancel_request, +// &(this_team->t.t_cancel_request)); +// we do not have a cancellation request in this team or we do have +// one that matches the current request -> cancel +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_cancel) { + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, + NULL); + ompt_cancel_flag_t type = ompt_cancel_parallel; + if (cncl_kind == cancel_parallel) + type = ompt_cancel_parallel; + else if (cncl_kind == cancel_loop) + type = ompt_cancel_do; + else if (cncl_kind == cancel_sections) + type = ompt_cancel_sections; + ompt_callbacks.ompt_callback(ompt_callback_cancel)( + task_data, type | ompt_cancel_activated, + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif return 1 /* true */; } break; @@ -75,8 +95,18 @@ kmp_int32 old = KMP_COMPARE_AND_STORE_RET32( &(taskgroup->cancel_request), cancel_noreq, cncl_kind); if (old == cancel_noreq || old == cncl_kind) { - // we do not have a cancellation request in this taskgroup or we do - // have one that matches the current request -> cancel +// we do not have a cancellation request in this taskgroup or we do +// have one that matches the current request -> cancel +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_cancel) { + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, + NULL); + ompt_callbacks.ompt_callback(ompt_callback_cancel)( + task_data, ompt_cancel_taskgroup | ompt_cancel_activated, + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif return 1 /* true */; } } else { @@ -134,8 +164,25 @@ KMP_DEBUG_ASSERT(this_team); if (this_team->t.t_cancel_request) { if (cncl_kind == this_team->t.t_cancel_request) { - // the request in the team structure matches the type of - // cancellation point so we can cancel +// the request in the team structure matches the type of +// cancellation point so we can cancel +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_cancel) { + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, + NULL); + ompt_cancel_flag_t type = ompt_cancel_parallel; + if (cncl_kind == cancel_parallel) + type = ompt_cancel_parallel; + else if (cncl_kind == cancel_loop) + type = ompt_cancel_do; + else if (cncl_kind == cancel_sections) + type = ompt_cancel_sections; + ompt_callbacks.ompt_callback(ompt_callback_cancel)( + task_data, type | ompt_cancel_detected, + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif return 1 /* true */; } KMP_ASSERT(0 /* false */); @@ -158,7 +205,18 @@ taskgroup = task->td_taskgroup; if (taskgroup) { - // return the current status of cancellation for the taskgroup +// return the current status of cancellation for the taskgroup +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_cancel && + !!taskgroup->cancel_request) { + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, + NULL); + ompt_callbacks.ompt_callback(ompt_callback_cancel)( + task_data, ompt_cancel_taskgroup | ompt_cancel_detected, + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif return !!taskgroup->cancel_request; } else { // if a cancellation point is encountered by a task that does not Index: runtime/src/kmp_config.h.cmake =================================================================== --- runtime/src/kmp_config.h.cmake +++ runtime/src/kmp_config.h.cmake @@ -45,10 +45,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#cmakedefine01 LIBOMP_OMPT_BLAME -#define OMPT_BLAME LIBOMP_OMPT_BLAME -#cmakedefine01 LIBOMP_OMPT_TRACE -#define OMPT_TRACE LIBOMP_OMPT_TRACE +#cmakedefine01 LIBOMP_OMPT_OPTIONAL +#define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS #define KMP_USE_ADAPTIVE_LOCKS LIBOMP_USE_ADAPTIVE_LOCKS #define KMP_DEBUG_ADAPTIVE_LOCKS 0 Index: runtime/src/kmp_csupport.cpp =================================================================== --- runtime/src/kmp_csupport.cpp +++ runtime/src/kmp_csupport.cpp @@ -278,7 +278,7 @@ #if OMPT_SUPPORT ompt_frame_t *ompt_frame; - if (ompt_enabled) { + if (ompt_enabled.enabled) { kmp_info_t *master_th = __kmp_threads[gtid]; kmp_team_t *parent_team = master_th->th.th_team; ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; @@ -289,7 +289,8 @@ ompt_frame = &( parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); } - ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -297,9 +298,6 @@ SSC_MARK_FORKING(); #endif __kmp_fork_call(loc, gtid, fork_context_intel, argc, -#if OMPT_SUPPORT - VOLATILE_CAST(void *) microtask, // "unwrapped" task -#endif VOLATILE_CAST(microtask_t) microtask, // "wrapped" task VOLATILE_CAST(launch_t) __kmp_invoke_task_func, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ @@ -371,11 +369,11 @@ #if OMPT_SUPPORT kmp_team_t *parent_team = this_thr->th.th_team; int tid = __kmp_tid_from_gtid(gtid); - if (ompt_enabled) { + if (ompt_enabled.enabled) { parent_team->t.t_implicit_task_taskdata[tid] - .ompt_task_info.frame.reenter_runtime_frame = - __builtin_frame_address(1); + .ompt_task_info.frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif // check if __kmpc_push_num_teams called, set default number of teams @@ -388,9 +386,6 @@ KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); __kmp_fork_call(loc, gtid, fork_context_intel, argc, -#if OMPT_SUPPORT - VOLATILE_CAST(void *) microtask, // "unwrapped" task -#endif VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, @@ -433,9 +428,12 @@ when the condition is false. */ void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { - // The implementation is now in kmp_runtime.cpp so that it can share static - // functions with kmp_fork_call since the tasks to be done are similar in - // each case. +// The implementation is now in kmp_runtime.cpp so that it can share static +// functions with kmp_fork_call since the tasks to be done are similar in +// each case. +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif __kmp_serialized_parallel(loc, global_tid); } @@ -482,6 +480,30 @@ KMP_DEBUG_ASSERT(serial_team->t.t_threads); KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); +#if OMPT_SUPPORT + if (ompt_enabled.enabled && + this_thr->th.ompt_thread_info.state != omp_state_overhead) { + OMPT_CUR_TASK_INFO(this_thr)->frame.exit_runtime_frame = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, + __kmp_tid_from_gtid(global_tid)); + } + + // reset clear the task id only after unlinking the task + ompt_data_t *parent_task_data; + __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); + + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, + ompt_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); + } + __ompt_lw_taskteam_unlink(this_thr); + this_thr->th.ompt_thread_info.state = omp_state_overhead; + } +#endif + /* If necessary, pop the internal control stack values and replace the team * values */ top = serial_team->t.t_control_stack_top; @@ -554,6 +576,12 @@ if (__kmp_env_consistency_check) __kmp_pop_parallel(global_tid, NULL); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + this_thr->th.ompt_thread_info.state = + ((this_thr->th.th_team_serialized) ? omp_state_work_serial + : omp_state_work_parallel); +#endif } /*! @@ -617,6 +645,13 @@ #else #error Unknown or unsupported architecture #endif + +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_flush) { + ompt_callbacks.ompt_callback(ompt_callback_flush)( + __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); + } +#endif } /* -------------------------------------------------------------------------- */ @@ -642,12 +677,13 @@ __kmp_check_barrier(global_tid, ct_barrier, loc); } -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT ompt_frame_t *ompt_frame; - if (ompt_enabled) { - ompt_frame = __ompt_get_task_frame_internal(0); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->reenter_runtime_frame == NULL) - ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); } #endif __kmp_threads[global_tid]->th.th_ident = loc; @@ -659,8 +695,8 @@ // 4) no sync is required __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { ompt_frame->reenter_runtime_frame = NULL; } #endif @@ -687,16 +723,17 @@ status = 1; } -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL if (status) { - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_begin)) { + if (ompt_enabled.ompt_callback_master) { kmp_info_t *this_thr = __kmp_threads[global_tid]; kmp_team_t *team = this_thr->th.th_team; int tid = __kmp_tid_from_gtid(global_tid); - ompt_callbacks.ompt_callback(ompt_event_master_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + ompt_callbacks.ompt_callback(ompt_callback_master)( + ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + OMPT_GET_RETURN_ADDRESS(0)); } } #endif @@ -732,14 +769,15 @@ KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); KMP_POP_PARTITIONED_TIMER(); -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL kmp_info_t *this_thr = __kmp_threads[global_tid]; kmp_team_t *team = this_thr->th.th_team; - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_end)) { + if (ompt_enabled.ompt_callback_master) { int tid = __kmp_tid_from_gtid(global_tid); - ompt_callbacks.ompt_callback(ompt_event_master_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + ompt_callbacks.ompt_callback(ompt_callback_master)( + ompt_scope_end, &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + OMPT_GET_RETURN_ADDRESS(0)); } #endif @@ -776,16 +814,24 @@ th = __kmp_threads[gtid]; -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + kmp_team_t *team; + ompt_wait_id_t lck; + void *codeptr_ra; + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + team = __kmp_team_from_gtid(gtid); + lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value; /* OMPT state update */ - th->th.ompt_thread_info.wait_id = (uint64_t)loc; - th->th.ompt_thread_info.state = ompt_state_wait_ordered; + th->th.ompt_thread_info.wait_id = lck; + th->th.ompt_thread_info.state = omp_state_wait_ordered; /* OMPT event callback */ - if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_wait_ordered)( - th->th.ompt_thread_info.wait_id); + codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_ordered, omp_lock_hint_none, ompt_mutex_impl_spin, + (ompt_wait_id_t)lck, codeptr_ra); } } #endif @@ -795,16 +841,16 @@ else __kmp_parallel_deo(>id, &cid, loc); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { /* OMPT state update */ - th->th.ompt_thread_info.state = ompt_state_work_parallel; + th->th.ompt_thread_info.state = omp_state_work_parallel; th->th.ompt_thread_info.wait_id = 0; /* OMPT event callback */ - if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)( - th->th.ompt_thread_info.wait_id); + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra); } } #endif @@ -839,11 +885,13 @@ else __kmp_parallel_dxo(>id, &cid, loc); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_release_ordered)( - th->th.ompt_thread_info.wait_id); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_ordered, + (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value, + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif } @@ -1063,11 +1111,18 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) { #if KMP_USE_DYNAMIC_LOCK +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif // OMPT_SUPPORT __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); #else KMP_COUNT_BLOCK(OMP_CRITICAL); KMP_TIME_PARTITIONED_BLOCK( OMP_critical_wait); /* Time spent waiting to enter the critical section */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + omp_state_t prev_state = omp_state_undefined; + ompt_thread_info_t ti; +#endif kmp_user_lock_p lck; KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); @@ -1101,6 +1156,25 @@ #if USE_ITT_BUILD __kmp_itt_critical_acquiring(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); + void *codeptr_ra = NULL; + if (ompt_enabled.enabled) { + ti = __kmp_threads[global_tid]->th.ompt_thread_info; + /* OMPT state update */ + prev_state = ti.state; + ti.wait_id = (ompt_wait_id_t)lck; + ti.state = omp_state_wait_critical; + + /* OMPT event callback */ + codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), + (ompt_wait_id_t)crit, codeptr_ra); + } + } +#endif // Value of 'crit' should be good for using as a critical_id of the critical // section directive. __kmp_acquire_user_lock_with_checks(lck, global_tid); @@ -1108,6 +1182,19 @@ #if USE_ITT_BUILD __kmp_itt_critical_acquired(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + /* OMPT state update */ + ti.state = prev_state; + ti.wait_id = 0; + + /* OMPT event callback */ + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra); + } + } +#endif KMP_START_EXPLICIT_TIMER(OMP_critical); KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); @@ -1160,6 +1247,76 @@ return __kmp_user_lock_seq; } +#if OMPT_SUPPORT && OMPT_OPTIONAL +static ompt_mutex_impl_t +__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { + if (user_lock) { + switch (KMP_EXTRACT_D_TAG(user_lock)) { + case 0: + break; +#if KMP_USE_FUTEX + case locktag_futex: + return ompt_mutex_impl_queuing; +#endif + case locktag_tas: + return ompt_mutex_impl_spin; +#if KMP_USE_TSX + case locktag_hle: + return ompt_mutex_impl_speculative; +#endif + default: + return ompt_mutex_impl_unknown; + } + ilock = KMP_LOOKUP_I_LOCK(user_lock); + } + KMP_ASSERT(ilock); + switch (ilock->type) { +#if KMP_USE_TSX + case locktag_adaptive: + case locktag_rtm: + return ompt_mutex_impl_speculative; +#endif + case locktag_nested_tas: + return ompt_mutex_impl_spin; +#if KMP_USE_FUTEX + case locktag_nested_futex: +#endif + case locktag_ticket: + case locktag_queuing: + case locktag_drdpa: + case locktag_nested_ticket: + case locktag_nested_queuing: + case locktag_nested_drdpa: + return ompt_mutex_impl_queuing; + default: + return ompt_mutex_impl_unknown; + } +} + +// For locks without dynamic binding +static ompt_mutex_impl_t __ompt_get_mutex_impl_type() { + switch (__kmp_user_lock_kind) { + case lk_tas: + return ompt_mutex_impl_spin; +#if KMP_USE_FUTEX + case lk_futex: +#endif + case lk_ticket: + case lk_queuing: + case lk_drdpa: + return ompt_mutex_impl_queuing; +#if KMP_USE_TSX + case lk_hle: + case lk_rtm: + case lk_adaptive: + return ompt_mutex_impl_speculative; +#endif + default: + return ompt_mutex_impl_unknown; + } +} +#endif + /*! @ingroup WORK_SHARING @param loc source location information. @@ -1177,6 +1334,14 @@ kmp_critical_name *crit, uintptr_t hint) { KMP_COUNT_BLOCK(OMP_CRITICAL); kmp_user_lock_p lck; +#if OMPT_SUPPORT && OMPT_OPTIONAL + omp_state_t prev_state = omp_state_undefined; + ompt_thread_info_t ti; + // This is the case, if called from __kmpc_critical: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); +#endif KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); @@ -1203,6 +1368,22 @@ #if USE_ITT_BUILD __kmp_itt_critical_acquiring(lck); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ti = __kmp_threads[global_tid]->th.ompt_thread_info; + /* OMPT state update */ + prev_state = ti.state; + ti.wait_id = (ompt_wait_id_t)lck; + ti.state = omp_state_wait_critical; + + /* OMPT event callback */ + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_critical, (unsigned int)hint, + __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr); + } + } +#endif #if KMP_USE_INLINED_TAS if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { KMP_ACQUIRE_TAS_LOCK(lck, global_tid); @@ -1225,12 +1406,41 @@ #if USE_ITT_BUILD __kmp_itt_critical_acquiring(lck); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ti = __kmp_threads[global_tid]->th.ompt_thread_info; + /* OMPT state update */ + prev_state = ti.state; + ti.wait_id = (ompt_wait_id_t)lck; + ti.state = omp_state_wait_critical; + + /* OMPT event callback */ + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_critical, (unsigned int)hint, + __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr); + } + } +#endif KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); } #if USE_ITT_BUILD __kmp_itt_critical_acquired(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + /* OMPT state update */ + ti.state = prev_state; + ti.wait_id = 0; + + /* OMPT event callback */ + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr); + } + } +#endif KMP_PUSH_PARTITIONED_TIMER(OMP_critical); KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); @@ -1317,14 +1527,18 @@ // section directive. __kmp_release_user_lock_with_checks(lck, global_tid); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_critical)) { - ompt_callbacks.ompt_callback(ompt_event_release_critical)((uint64_t)lck); +#endif // KMP_USE_DYNAMIC_LOCK + +#if OMPT_SUPPORT && OMPT_OPTIONAL + /* OMPT release event triggers after lock is released; place here to trigger + * for all #if branches */ + OMPT_STORE_RETURN_ADDRESS(global_tid); + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0)); } #endif -#endif // KMP_USE_DYNAMIC_LOCK KMP_POP_PARTITIONED_TIMER(); KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); } @@ -1349,10 +1563,24 @@ if (__kmp_env_consistency_check) __kmp_check_barrier(global_tid, ct_barrier, loc); +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif return (status != 0) ? 0 : 1; } @@ -1397,10 +1625,24 @@ __kmp_check_barrier(global_tid, ct_barrier, loc); } +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif ret = __kmpc_master(loc, global_tid); @@ -1443,26 +1685,35 @@ KMP_PUSH_PARTITIONED_TIMER(OMP_single); } -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL kmp_info_t *this_thr = __kmp_threads[global_tid]; kmp_team_t *team = this_thr->th.th_team; int tid = __kmp_tid_from_gtid(global_tid); - if (ompt_enabled) { + if (ompt_enabled.enabled) { if (rc) { - if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) { - ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id, - team->t.ompt_team_info.microtask); + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_executor, ompt_scope_begin, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); } } else { - if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) { - ompt_callbacks.ompt_callback(ompt_event_single_others_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_other, ompt_scope_begin, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_other, ompt_scope_end, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); } - this_thr->th.ompt_thread_info.state = ompt_state_wait_single; + // this_thr->th.ompt_thread_info.state = + // omp_state_work_parallel; } } #endif @@ -1483,16 +1734,17 @@ __kmp_exit_single(global_tid); KMP_POP_PARTITIONED_TIMER(); -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL kmp_info_t *this_thr = __kmp_threads[global_tid]; kmp_team_t *team = this_thr->th.th_team; int tid = __kmp_tid_from_gtid(global_tid); - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) { - ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_executor, ompt_scope_end, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, + OMPT_GET_RETURN_ADDRESS(0)); } #endif } @@ -1507,12 +1759,28 @@ void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_work_type_t ompt_work_type; ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - ompt_callbacks.ompt_callback(ompt_event_loop_end)(team_info->parallel_id, - task_info->task_id); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + // Determine workshare type + if (loc != NULL) { + if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { + ompt_work_type = ompt_work_loop; + } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { + ompt_work_type = ompt_work_sections; + } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { + ompt_work_type = ompt_work_distribute; + } else { + KMP_ASSERT2(0, + "__kmpc_for_static_fini: can't determine workshare type"); + } + KMP_DEBUG_ASSERT(ompt_work_type); + } + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_type, ompt_scope_end, &(team_info->parallel_data), + &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); } #endif @@ -1709,6 +1977,15 @@ if (didit) *data_ptr = cpy_data; +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif /* This barrier is not a barrier region boundary */ #if USE_ITT_NOTIFY __kmp_threads[gtid]->th.th_ident = loc; @@ -1721,11 +1998,21 @@ // Consider next barrier a user-visible barrier for barrier region boundaries // Nesting checks are already handled by the single construct checks +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. // tasks can overwrite the location) #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } /* -------------------------------------------------------------------------- */ @@ -1812,6 +2099,19 @@ } __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_lock, (omp_lock_hint_t)hint, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif } /* initialize the lock with a hint */ @@ -1823,6 +2123,19 @@ } __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_nest_lock, (omp_lock_hint_t)hint, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif } #endif // KMP_USE_DYNAMIC_LOCK @@ -1837,6 +2150,19 @@ } __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif + #else // KMP_USE_DYNAMIC_LOCK static char const *const func = "omp_init_lock"; @@ -1867,9 +2193,15 @@ INIT_LOCK(lck); __kmp_set_user_lock_location(lck, loc); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_lock)) { - ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), + (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -1890,6 +2222,19 @@ } __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_nest_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif + #else // KMP_USE_DYNAMIC_LOCK static char const *const func = "omp_init_nest_lock"; @@ -1923,9 +2268,15 @@ INIT_NESTED_LOCK(lck); __kmp_set_user_lock_location(lck, loc); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) { - ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), + (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -1948,6 +2299,22 @@ } __kmp_itt_lock_destroyed(lck); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_destroy) { + kmp_user_lock_p lck; + if (KMP_EXTRACT_D_TAG(user_lock) == 0) { + lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; + } else { + lck = (kmp_user_lock_p)user_lock; + } + ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + } +#endif KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); #else kmp_user_lock_p lck; @@ -1966,9 +2333,14 @@ lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); } -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) { - ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_destroy) { + ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2001,6 +2373,16 @@ kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); __kmp_itt_lock_destroyed(ilk->lock); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_destroy) { + ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + } +#endif KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); #else // KMP_USE_DYNAMIC_LOCK @@ -2023,10 +2405,14 @@ lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); } -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) { - ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_destroy) { + ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2063,6 +2449,18 @@ (kmp_user_lock_p) user_lock); // itt function will get to the right lock object. #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif #if KMP_USE_INLINED_TAS if (tag == locktag_tas && !__kmp_env_consistency_check) { KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); @@ -2078,6 +2476,12 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + } +#endif #else // KMP_USE_DYNAMIC_LOCK @@ -2100,6 +2504,17 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), + (ompt_wait_id_t)lck, codeptr); + } +#endif ACQUIRE_LOCK(lck, gtid); @@ -2107,9 +2522,10 @@ __kmp_itt_lock_acquired(lck); #endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); } #endif @@ -2122,14 +2538,41 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); #endif - KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_nest_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } + } +#endif + int acquire_status = + KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); #if USE_ITT_BUILD __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); #endif -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - // missing support here: need to know whether acquired first or not +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { + if (ompt_enabled.ompt_callback_mutex_acquired) { + // lock_first + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + } + } else { + if (ompt_enabled.ompt_callback_nest_lock) { + // lock_next + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); + } + } } #endif @@ -2156,6 +2599,19 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_nest_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); + } + } +#endif ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); @@ -2163,16 +2619,20 @@ __kmp_itt_lock_acquired(lck); #endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { - if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)) - ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)( - (uint64_t)lck); + if (ompt_enabled.ompt_callback_mutex_acquired) { + // lock_first + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); + } } else { - if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)) - ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)( - (uint64_t)lck); + if (ompt_enabled.ompt_callback_nest_lock) { + // lock_next + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_scope_begin, (ompt_wait_id_t)lck, codeptr); + } } } #endif @@ -2200,6 +2660,17 @@ __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); } +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + } +#endif + #else // KMP_USE_DYNAMIC_LOCK kmp_user_lock_p lck; @@ -2217,6 +2688,18 @@ #endif /* USE_ITT_BUILD */ TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); KMP_MB(); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); + } +#endif + return; #else lck = (kmp_user_lock_p)user_lock; @@ -2238,9 +2721,14 @@ RELEASE_LOCK(lck, gtid); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_lock)) { - ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); } #endif @@ -2254,7 +2742,28 @@ #if USE_ITT_BUILD __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); #endif - KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); + int release_status = + KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) { + if (release_status == KMP_LOCK_RELEASED) { + if (ompt_enabled.ompt_callback_mutex_released) { + // release_lock_last + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + } + } else if (ompt_enabled.ompt_callback_nest_lock) { + // release_lock_prev + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr); + } + } +#endif #else // KMP_USE_DYNAMIC_LOCK @@ -2272,10 +2781,39 @@ #if USE_ITT_BUILD __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); #endif /* USE_ITT_BUILD */ + +#if OMPT_SUPPORT && OMPT_OPTIONAL + int release_status = KMP_LOCK_STILL_HELD; +#endif + if (--(tl->lk.depth_locked) == 0) { TCW_4(tl->lk.poll, 0); +#if OMPT_SUPPORT && OMPT_OPTIONAL + release_status = KMP_LOCK_RELEASED; +#endif } KMP_MB(); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) { + if (release_status == KMP_LOCK_RELEASED) { + if (ompt_enabled.ompt_callback_mutex_released) { + // release_lock_last + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); + } + } else if (ompt_enabled.ompt_callback_nest_lock) { + // release_lock_previous + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); + } + } +#endif + return; #else lck = (kmp_user_lock_p)user_lock; @@ -2298,17 +2836,22 @@ int release_status; release_status = RELEASE_NESTED_LOCK(lck, gtid); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) { if (release_status == KMP_LOCK_RELEASED) { - if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) { - ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)( - (uint64_t)lck); + if (ompt_enabled.ompt_callback_mutex_released) { + // release_lock_last + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); } - } else if (ompt_callbacks.ompt_callback( - ompt_event_release_nest_lock_prev)) { - ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)( - (uint64_t)lck); + } else if (ompt_enabled.ompt_callback_nest_lock) { + // release_lock_previous + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); } } #endif @@ -2326,6 +2869,18 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif #if KMP_USE_INLINED_TAS if (tag == locktag_tas && !__kmp_env_consistency_check) { KMP_TEST_TAS_LOCK(user_lock, gtid, rc); @@ -2342,6 +2897,12 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + } +#endif return FTN_TRUE; } else { #if USE_ITT_BUILD @@ -2372,6 +2933,17 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), + (ompt_wait_id_t)lck, codeptr); + } +#endif rc = TEST_LOCK(lck, gtid); #if USE_ITT_BUILD @@ -2381,6 +2953,13 @@ __kmp_itt_lock_cancelled(lck); } #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (rc && ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); + } +#endif + return (rc ? FTN_TRUE : FTN_FALSE); /* Can't use serial interval since not block structured */ @@ -2395,6 +2974,18 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_nest_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); #if USE_ITT_BUILD if (rc) { @@ -2403,6 +2994,23 @@ __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); } #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled && rc) { + if (rc == 1) { + if (ompt_enabled.ompt_callback_mutex_acquired) { + // lock_first + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + } + } else { + if (ompt_enabled.ompt_callback_nest_lock) { + // lock_next + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); + } + } + } +#endif return rc; #else // KMP_USE_DYNAMIC_LOCK @@ -2430,6 +3038,19 @@ __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) && + ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_nest_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); + } +#endif + rc = TEST_NESTED_LOCK(lck, gtid); #if USE_ITT_BUILD if (rc) { @@ -2438,6 +3059,23 @@ __kmp_itt_lock_cancelled(lck); } #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled && rc) { + if (rc == 1) { + if (ompt_enabled.ompt_callback_mutex_acquired) { + // lock_first + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); + } + } else { + if (ompt_enabled.ompt_callback_nest_lock) { + // lock_next + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr); + } + } + } +#endif return rc; /* Can't use serial interval since not block structured */ @@ -2697,6 +3335,19 @@ // this barrier should be invisible to a customer and to the threading profile // tool (it's neither a terminating barrier nor customer's code, it's // used for an internal purpose) +#if OMPT_SUPPORT + // JP: can this barrier potentially leed to task scheduling? + // JP: as long as there is a barrier in the implementation, OMPT should and + // will provide the barrier events + // so we set-up the necessary frame/return addresses. + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif @@ -2704,6 +3355,11 @@ __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), global_tid, FALSE, reduce_size, reduce_data, reduce_func); retval = (retval != 0) ? (0) : (1); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif // all other workers except master should do this pop here // ( none of other workers will get to __kmpc_end_reduce_nowait() ) @@ -2859,6 +3515,15 @@ // case tree_reduce_block: // this barrier should be visible to a customer and to the threading profile // tool (it's a terminating barrier on constructs if NOWAIT not specified) +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames @@ -2867,6 +3532,11 @@ __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), global_tid, TRUE, reduce_size, reduce_data, reduce_func); retval = (retval != 0) ? (0) : (1); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif // all other workers except master should do this pop here // ( none of other workers except master will enter __kmpc_end_reduce() ) @@ -2916,28 +3586,70 @@ __kmp_end_critical_section_reduce_block(loc, global_tid, lck); // TODO: implicit barrier: should be exposed +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } else if (packed_reduction_method == empty_reduce_block) { // usage: if team size==1, no synchronization is required (Intel platforms only) // TODO: implicit barrier: should be exposed +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } else if (packed_reduction_method == atomic_reduce_block) { +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif // TODO: implicit barrier: should be exposed #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } else if (TEST_REDUCTION_METHOD(packed_reduction_method, tree_reduce_block)) { Index: runtime/src/kmp_dispatch.cpp =================================================================== --- runtime/src/kmp_dispatch.cpp +++ runtime/src/kmp_dispatch.cpp @@ -1230,12 +1230,16 @@ } #endif // ( KMP_STATIC_STEAL_ENABLED ) -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + kmp_info_t *thr = __kmp_threads[gtid]; + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), + tc, // TODO: OMPT: verify loop count value (OpenMP-spec 4.6.2.18) + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif } @@ -1390,16 +1394,18 @@ /* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini() is not called. */ -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL #define OMPT_LOOP_END \ if (status == 0) { \ - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \ + if (ompt_enabled.ompt_callback_work) { \ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \ - ompt_callbacks.ompt_callback(ompt_event_loop_end)( \ - team_info->parallel_id, task_info->task_id); \ + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ + ompt_callbacks.ompt_callback(ompt_callback_work)( \ + ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \ + &(task_info->task_data), 0, codeptr); \ } \ } +// TODO: implement count #else #define OMPT_LOOP_END // no-op #endif @@ -1407,7 +1413,12 @@ template static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub, - typename traits_t::signed_t *p_st) { + typename traits_t::signed_t *p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + void *codeptr +#endif + ) { typedef typename traits_t::unsigned_t UT; typedef typename traits_t::signed_t ST; @@ -2527,6 +2538,9 @@ enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } /*! @@ -2536,6 +2550,9 @@ enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2546,6 +2563,9 @@ enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2556,6 +2576,9 @@ enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2573,6 +2596,9 @@ kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2582,6 +2608,9 @@ kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2591,6 +2620,9 @@ kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2600,6 +2632,9 @@ kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2619,7 +2654,15 @@ */ int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) { - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); } /*! @@ -2628,7 +2671,15 @@ int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st) { - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); } /*! @@ -2636,7 +2687,15 @@ */ int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) { - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); } /*! @@ -2645,7 +2704,15 @@ int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st) { - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); } /*! Index: runtime/src/kmp_ftn_entry.h =================================================================== --- runtime/src/kmp_ftn_entry.h +++ runtime/src/kmp_ftn_entry.h @@ -21,6 +21,10 @@ #include "kmp_i18n.h" +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif + #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -340,6 +344,26 @@ #endif } +#if OMP_50_ENABLED +int FTN_STDCALL xexpand(FTN_CONTROL_TOOL)(uint64_t command, uint64_t modifier, + void *arg) { +#if defined(KMP_STUB) || !OMPT_SUPPORT + return -2; +#else + OMPT_STORE_RETURN_ADDRESS(__kmp_entry_gtid()); + if (!TCR_4(__kmp_init_middle)) { + return -2; + } + kmp_info_t *this_thr = __kmp_threads[__kmp_entry_gtid()]; + ompt_task_info_t *parent_task_info = OMPT_CUR_TASK_INFO(this_thr); + parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + int ret = __kmp_control_tool(command, modifier, arg); + parent_task_info->frame.reenter_runtime_frame = 0; + return ret; +#endif +} +#endif + int FTN_STDCALL xexpand(FTN_GET_THREAD_NUM)(void) { #ifdef KMP_STUB return 0; @@ -873,8 +897,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNLOCKED; #else - __kmpc_init_lock_with_hint(NULL, __kmp_entry_gtid(), user_lock, - KMP_DEREF hint); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_init_lock_with_hint(NULL, gtid, user_lock, KMP_DEREF hint); #endif } @@ -883,8 +910,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNLOCKED; #else - __kmpc_init_nest_lock_with_hint(NULL, __kmp_entry_gtid(), user_lock, - KMP_DEREF hint); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_init_nest_lock_with_hint(NULL, gtid, user_lock, KMP_DEREF hint); #endif } #endif @@ -894,7 +924,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNLOCKED; #else - __kmpc_init_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_init_lock(NULL, gtid, user_lock); #endif } @@ -903,7 +937,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNLOCKED; #else - __kmpc_init_nest_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_init_nest_lock(NULL, gtid, user_lock); #endif } @@ -911,7 +949,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNINIT; #else - __kmpc_destroy_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_destroy_lock(NULL, gtid, user_lock); #endif } @@ -919,7 +961,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNINIT; #else - __kmpc_destroy_nest_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_destroy_nest_lock(NULL, gtid, user_lock); #endif } @@ -933,7 +979,11 @@ } *((kmp_stub_lock_t *)user_lock) = LOCKED; #else - __kmpc_set_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_set_lock(NULL, gtid, user_lock); #endif } @@ -944,7 +994,11 @@ } (*((int *)user_lock))++; #else - __kmpc_set_nest_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_set_nest_lock(NULL, gtid, user_lock); #endif } @@ -958,7 +1012,11 @@ } *((kmp_stub_lock_t *)user_lock) = UNLOCKED; #else - __kmpc_unset_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_unset_lock(NULL, gtid, user_lock); #endif } @@ -972,7 +1030,11 @@ } (*((int *)user_lock))--; #else - __kmpc_unset_nest_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_unset_nest_lock(NULL, gtid, user_lock); #endif } @@ -987,7 +1049,11 @@ *((kmp_stub_lock_t *)user_lock) = LOCKED; return 1; #else - return __kmpc_test_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmpc_test_lock(NULL, gtid, user_lock); #endif } @@ -998,7 +1064,11 @@ } return ++(*((int *)user_lock)); #else - return __kmpc_test_nest_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmpc_test_nest_lock(NULL, gtid, user_lock); #endif } @@ -1214,6 +1284,7 @@ #if OMP_50_ENABLED // OMP_5.0 aliases +xaliasify(FTN_CONTROL_TOOL, 50); #endif // OMP_1.0 versioned symbols @@ -1285,6 +1356,7 @@ #if OMP_50_ENABLED // OMP_5.0 versioned symbols +xversionify(FTN_CONTROL_TOOL, 50, "OMP_5.0"); #endif #endif // KMP_USE_VERSION_SYMBOLS Index: runtime/src/kmp_ftn_os.h =================================================================== --- runtime/src/kmp_ftn_os.h +++ runtime/src/kmp_ftn_os.h @@ -51,6 +51,9 @@ #define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads +#if OMPT_SUPPORT +#define FTN_CONTROL_TOOL omp_control_tool +#endif #define FTN_SET_NUM_THREADS omp_set_num_threads #define FTN_GET_NUM_THREADS omp_get_num_threads #define FTN_GET_MAX_THREADS omp_get_max_threads @@ -251,6 +254,10 @@ #endif #endif +#if OMP_50_ENABLED +#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL +#endif + #endif /* KMP_FTN_APPEND */ /* ------------------------------------------------------------------------ */ @@ -369,6 +376,10 @@ #endif #endif +#if OMP_50_ENABLED +#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL +#endif + #endif /* KMP_FTN_UPPER */ /* ------------------------------------------------------------------------ */ @@ -405,6 +416,9 @@ #define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS_ +#if OMPT_SUPPORT +#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL_ +#endif #define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS_ #define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS_ #define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS_ Index: runtime/src/kmp_global.cpp =================================================================== --- runtime/src/kmp_global.cpp +++ runtime/src/kmp_global.cpp @@ -303,6 +303,10 @@ kmp_uint64 __kmp_taskloop_min_tasks = 0; #endif +#if OMP_50_ENABLED && OMPT_SUPPORT +char const *__kmp_tool_libraries = NULL; +#endif + /* This check ensures that the compiler is passing the correct data type for the flags formal parameter of the function kmpc_omp_task_alloc(). If the type is not a 4-byte type, then give an error message about a non-positive length Index: runtime/src/kmp_gsupport.cpp =================================================================== --- runtime/src/kmp_gsupport.cpp +++ runtime/src/kmp_gsupport.cpp @@ -31,14 +31,20 @@ int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_barrier"); KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL ompt_frame_t *ompt_frame; - if (ompt_enabled) { - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); } #endif __kmpc_barrier(&loc, gtid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } // Mutual exclusion @@ -56,6 +62,9 @@ int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_critical_start"); KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); } @@ -63,6 +72,9 @@ int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_critical_end"); KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); } @@ -111,7 +123,42 @@ // 3rd parameter == FALSE prevents kmp_enter_single from pushing a // workshare when USE_CHECKS is defined. We need to avoid the push, // as there is no corresponding GOMP_single_end() call. - return __kmp_enter_single(gtid, &loc, FALSE); + kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *team = this_thr->th.th_team; + int tid = __kmp_tid_from_gtid(gtid); + + if (ompt_enabled.enabled) { + if (rc) { + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_executor, ompt_scope_begin, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + } + } else { + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_other, ompt_scope_begin, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_other, ompt_scope_end, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + } + // this_thr->th.ompt_thread_info.state = + // omp_state_work_parallel; + } + } +#endif + + return rc; } void *xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) { @@ -129,14 +176,33 @@ if (__kmp_enter_single(gtid, &loc, FALSE)) return NULL; - // Wait for the first thread to set the copyprivate data pointer, - // and for all other threads to reach this point. +// Wait for the first thread to set the copyprivate data pointer, +// and for all other threads to reach this point. + +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); // Retrieve the value of the copyprivate data point, and wait for all // threads to do likewise, then return. retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif return retval; } @@ -149,14 +215,35 @@ // continuing, so that the know that the copyprivate data pointer has been // propagated to all threads before trying to reuse the t_copypriv_data field. __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } void xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_ordered_start"); KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_ordered(&loc, gtid); } @@ -164,6 +251,9 @@ int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_ordered_end"); KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_end_ordered(&loc, gtid); } @@ -197,26 +287,26 @@ #if OMPT_SUPPORT kmp_info_t *thr; ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; + omp_state_t enclosing_state; - if (ompt_enabled) { + if (ompt_enabled.enabled) { // get pointer to thread data structure thr = __kmp_threads[*gtid]; // save enclosing task state; set current state for task enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; + thr->th.ompt_thread_info.state = omp_state_work_parallel; // set task frame - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = __builtin_frame_address(0); + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0); } #endif task(data); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { // clear task frame ompt_frame->exit_runtime_frame = NULL; @@ -236,24 +326,29 @@ enum sched_type schedule, long start, long end, long incr, long chunk_size) { - // Intialize the loop worksharing construct. +// Intialize the loop worksharing construct. + +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + OMPT_STORE_RETURN_ADDRESS(*gtid); +#endif KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, schedule != kmp_sch_static); #if OMPT_SUPPORT kmp_info_t *thr; ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; + omp_state_t enclosing_state; - if (ompt_enabled) { + if (ompt_enabled.enabled) { thr = __kmp_threads[*gtid]; // save enclosing task state; set current state for task enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; + thr->th.ompt_thread_info.state = omp_state_work_parallel; // set task frame - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = __builtin_frame_address(0); + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0); } #endif @@ -261,7 +356,7 @@ task(data); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { // clear task frame ompt_frame->exit_runtime_frame = NULL; @@ -285,11 +380,8 @@ va_list ap; va_start(ap, argc); - rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, -#if OMPT_SUPPORT - VOLATILE_CAST(void *) unwrapped_task, -#endif - wrapper, __kmp_invoke_task_func, + rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper, + __kmp_invoke_task_func, #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX &ap #else @@ -304,18 +396,19 @@ } #if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_TRACE + int ompt_team_size; + if (ompt_enabled.enabled) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); // implicit task callback - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - team_info->parallel_id, task_info->task_id); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid)); } -#endif - thr->th.ompt_thread_info.state = ompt_state_work_parallel; + thr->th.ompt_thread_info.state = omp_state_work_parallel; } #endif } @@ -323,47 +416,9 @@ static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, void (*task)(void *)) { #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id; - if (ompt_enabled) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - - ompt_parallel_id = __ompt_parallel_id_new(gtid); - - // parallel region callback - if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { - int team_size = 1; - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( - task_info->task_id, &task_info->frame, ompt_parallel_id, team_size, - (void *)task, OMPT_INVOKER(fork_context_gnu)); - } - } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmp_serialized_parallel(loc, gtid); - -#if OMPT_SUPPORT - if (ompt_enabled) { - kmp_info_t *thr = __kmp_threads[gtid]; - - ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid); - - // set up lightweight task - ompt_lw_taskteam_t *lwt = - (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); - __ompt_lw_taskteam_init(lwt, thr, gtid, (void *)task, ompt_parallel_id); - lwt->ompt_task_info.task_id = my_ompt_task_id; - __ompt_lw_taskteam_link(lwt, thr); - -#if OMPT_TRACE - // implicit task callback - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - ompt_parallel_id, my_ompt_task_id); - } - thr->th.ompt_thread_info.state = ompt_state_work_parallel; -#endif - } -#endif } void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, @@ -373,10 +428,11 @@ #if OMPT_SUPPORT ompt_frame_t *parent_frame, *frame; - if (ompt_enabled) { - parent_frame = __ompt_get_task_frame_internal(0); - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); + parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif MKLOC(loc, "GOMP_parallel_start"); @@ -394,9 +450,9 @@ } #if OMPT_SUPPORT - if (ompt_enabled) { - frame = __ompt_get_task_frame_internal(0); - frame->exit_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL); + frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } #endif } @@ -404,44 +460,23 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) { int gtid = __kmp_get_gtid(); kmp_info_t *thr; + int ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc; thr = __kmp_threads[gtid]; MKLOC(loc, "GOMP_parallel_end"); KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid)); -#if OMPT_SUPPORT - ompt_parallel_id_t parallel_id; - ompt_task_id_t serialized_task_id; - ompt_frame_t *ompt_frame = NULL; - - if (ompt_enabled) { - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - parallel_id = team_info->parallel_id; - - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - serialized_task_id = task_info->task_id; - - // unlink if necessary. no-op if there is not a lightweight task. - ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr); - // GOMP allocates/frees lwt since it can't be kept on the stack - if (lwt) { - __kmp_free(lwt); - } - } -#endif - if (!thr->th.th_team->t.t_serialized) { __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, thr->th.th_team); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { // Implicit task is finished here, in the barrier we might schedule // deferred tasks, // these don't see the implicit task on the stack - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = NULL; + OMPT_CUR_TASK_INFO(thr)->frame.exit_runtime_frame = NULL; } #endif @@ -452,35 +487,7 @@ #endif ); } else { -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - parallel_id, serialized_task_id); - } -#endif - __kmpc_end_serialized_parallel(&loc, gtid); - -#if OMPT_SUPPORT - if (ompt_enabled) { - // Record that we re-entered the runtime system in the frame that - // created the parallel region. - ompt_task_info_t *parent_task_info = __ompt_get_taskinfo(0); - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - parallel_id, parent_task_info->task_id, - OMPT_INVOKER(fork_context_gnu)); - } - - parent_task_info->frame.reenter_runtime_frame = NULL; - - thr->th.ompt_thread_info.state = - (((thr->th.th_team)->t.t_serialized) ? ompt_state_work_serial - : ompt_state_work_parallel); - } -#endif } } @@ -508,6 +515,12 @@ // num and calculate the iteration space using the result. It doesn't do this // with ordered static loop, so they can be checked. +#if OMPT_SUPPORT +#define IF_OMPT_SUPPORT(code) code +#else +#define IF_OMPT_SUPPORT(code) +#endif + #define LOOP_START(func, schedule) \ int func(long lb, long ub, long str, long chunk_sz, long *p_lb, \ long *p_ub) { \ @@ -520,9 +533,11 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ (schedule) != kmp_sch_static); \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ if (status) { \ @@ -551,8 +566,10 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ if (status) { \ @@ -577,6 +594,7 @@ MKLOC(loc, #func); \ KA_TRACE(20, (#func ": T#%d\n", gtid)); \ \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ if (status) { \ @@ -621,7 +639,20 @@ int gtid = __kmp_get_gtid(); KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) } @@ -796,17 +827,18 @@ KA_TRACE(20, (#func " exit: T#%d\n", gtid)); \ } -#if OMPT_SUPPORT +#if OMPT_SUPPORT && OMPT_OPTIONAL #define OMPT_LOOP_PRE() \ ompt_frame_t *parent_frame; \ - if (ompt_enabled) { \ - parent_frame = __ompt_get_task_frame_internal(0); \ - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); \ - } + if (ompt_enabled.enabled) { \ + __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \ + parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); \ + } \ + OMPT_STORE_RETURN_ADDRESS(gtid); #define OMPT_LOOP_POST() \ - if (ompt_enabled) { \ + if (ompt_enabled.enabled) { \ parent_frame->reenter_runtime_frame = NULL; \ } @@ -878,6 +910,16 @@ } } +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); + kmp_taskdata_t *current_task; + if (ompt_enabled.enabled) { + current_task = __kmp_threads[gtid]->th.th_current_task; + current_task->ompt_task_info.frame.reenter_runtime_frame = + OMPT_GET_FRAME_ADDRESS(1); + } +#endif + if (if_cond) { #if OMP_40_ENABLED if (gomp_flags & 8) { @@ -893,24 +935,28 @@ dep_list[i].flags.out = (i < nout); } __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); - } else + } else { #endif + // TODO: Intel, why call kmpc and not kmp here? __kmpc_omp_task(&loc, gtid, task); + } } else { #if OMPT_SUPPORT ompt_thread_info_t oldInfo; kmp_info_t *thread; kmp_taskdata_t *taskdata; - if (ompt_enabled) { + kmp_taskdata_t *current_task; + if (ompt_enabled.enabled) { // Store the threads states and restore them after the task thread = __kmp_threads[gtid]; taskdata = KMP_TASK_TO_TASKDATA(task); oldInfo = thread->th.ompt_thread_info; thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = ompt_state_work_parallel; + thread->th.ompt_thread_info.state = omp_state_work_parallel; taskdata->ompt_task_info.frame.exit_runtime_frame = - __builtin_frame_address(0); + OMPT_GET_FRAME_ADDRESS(0); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif __kmpc_omp_task_begin_if0(&loc, gtid, task); @@ -918,12 +964,17 @@ __kmpc_omp_task_complete_if0(&loc, gtid, task); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { thread->th.ompt_thread_info = oldInfo; taskdata->ompt_task_info.frame.exit_runtime_frame = NULL; } #endif } +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + current_task->ompt_task_info.frame.reenter_runtime_frame = NULL; + } +#endif KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); } @@ -1001,10 +1052,11 @@ #if OMPT_SUPPORT ompt_frame_t *parent_frame; - if (ompt_enabled) { - parent_frame = __ompt_get_task_frame_internal(0); - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); + parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif MKLOC(loc, "GOMP_parallel_sections_start"); @@ -1023,7 +1075,7 @@ } #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { parent_frame->reenter_runtime_frame = NULL; } #endif @@ -1037,7 +1089,20 @@ int gtid = __kmp_get_gtid(); KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) } @@ -1063,10 +1128,11 @@ #if OMPT_SUPPORT ompt_task_info_t *parent_task_info, *task_info; - if (ompt_enabled) { - parent_task_info = __ompt_get_taskinfo(0); - parent_task_info->frame.reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + parent_task_info = __ompt_get_task_info_object(0); + parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { if (num_threads != 0) { @@ -1082,15 +1148,20 @@ __kmp_GOMP_serialized_parallel(&loc, gtid, task); } #if OMPT_SUPPORT - if (ompt_enabled) { - task_info = __ompt_get_taskinfo(0); - task_info->frame.exit_runtime_frame = __builtin_frame_address(0); + if (ompt_enabled.enabled) { + task_info = __ompt_get_task_info_object(0); + task_info->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0); } #endif task(data); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { task_info->frame.exit_runtime_frame = NULL; parent_task_info->frame.reenter_runtime_frame = NULL; } @@ -1106,6 +1177,10 @@ MKLOC(loc, "GOMP_parallel_sections"); KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid)); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { if (num_threads != 0) { __kmp_push_num_threads(&loc, gtid, num_threads); @@ -1128,6 +1203,12 @@ KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); } +#if OMPT_SUPPORT +#define INCLUDE_IF_OMPT_SUPPORT(code) code +#else +#define INCLUDE_IF_OMPT_SUPPORT(code) +#endif + #define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post) \ void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \ long ub, long str, long chunk_sz, unsigned flags) { \ @@ -1153,6 +1234,8 @@ __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ } \ \ + INCLUDE_IF_OMPT_SUPPORT(if (ompt_enabled.enabled) \ + OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ (schedule) != kmp_sch_static); \ Index: runtime/src/kmp_lock.h =================================================================== --- runtime/src/kmp_lock.h +++ runtime/src/kmp_lock.h @@ -1142,7 +1142,7 @@ // with/without consistency checking. extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *); -extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); @@ -1150,7 +1150,7 @@ // with/withuot consistency checking. extern void (*__kmp_indirect_init[])(kmp_user_lock_p); extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p); -extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); Index: runtime/src/kmp_lock.cpp =================================================================== --- runtime/src/kmp_lock.cpp +++ runtime/src/kmp_lock.cpp @@ -1135,7 +1135,7 @@ kmp_int32 need_mf = 1; #if OMPT_SUPPORT - ompt_state_t prev_state = ompt_state_undefined; + omp_state_t prev_state = omp_state_undefined; #endif KA_TRACE(1000, @@ -1243,7 +1243,7 @@ #endif #if OMPT_SUPPORT - if (ompt_enabled && prev_state != ompt_state_undefined) { + if (ompt_enabled.enabled && prev_state != omp_state_undefined) { /* change the state before clearing wait_id */ this_thr->th.ompt_thread_info.state = prev_state; this_thr->th.ompt_thread_info.wait_id = 0; @@ -1258,11 +1258,11 @@ } #if OMPT_SUPPORT - if (ompt_enabled && prev_state == ompt_state_undefined) { + if (ompt_enabled.enabled && prev_state == omp_state_undefined) { /* this thread will spin; set wait_id before entering wait state */ prev_state = this_thr->th.ompt_thread_info.state; this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; - this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; + this_thr->th.ompt_thread_info.state = omp_state_wait_lock; } #endif @@ -2911,11 +2911,11 @@ static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, kmp_dyna_lockseq_t tag); static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); -static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); +static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); -static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, - kmp_int32); +static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, + kmp_int32); static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, @@ -2938,14 +2938,13 @@ // set/acquire functions #define expand(l, op) \ - 0, (void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, -static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { + 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, +static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; #undef expand #define expand(l, op) \ - 0, (void (*)(kmp_dyna_lock_t *, \ - kmp_int32))__kmp_##op##_##l##_lock_with_checks, -static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { + 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, +static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; #undef expand @@ -2968,7 +2967,7 @@ #undef expand // Exposes only one set of jump tables (*lock or *lock_with_checks). -void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; +int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; @@ -2982,13 +2981,13 @@ // set/acquire functions #define expand(l, op) \ - (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, -static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { - KMP_FOREACH_I_LOCK(expand, acquire)}; + (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, +static int (*indirect_set[])(kmp_user_lock_p, + kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; #undef expand #define expand(l, op) \ - (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, -static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { + (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, +static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire)}; #undef expand @@ -3009,7 +3008,7 @@ #undef expand // Exposes only one jump tables (*lock or *lock_with_checks). -void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; +int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; @@ -3164,9 +3163,9 @@ __kmp_release_lock(&__kmp_global_lock, gtid); } -static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { +static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); + return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); } static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { @@ -3179,11 +3178,11 @@ return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); } -static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, - kmp_int32 gtid) { +static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, + kmp_int32 gtid) { kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); - KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); + return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); } static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, Index: runtime/src/kmp_runtime.cpp =================================================================== --- runtime/src/kmp_runtime.cpp +++ runtime/src/kmp_runtime.cpp @@ -722,16 +722,6 @@ /* TODO replace with general release procedure */ team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { - /* accept blame for "ordered" waiting */ - kmp_info_t *this_thread = __kmp_threads[gtid]; - ompt_callbacks.ompt_callback(ompt_event_release_ordered)( - this_thread->th.ompt_thread_info.wait_id); - } -#endif - KMP_MB(); /* Flush all pending memory write invalidates. */ } #endif /* BUILD_PARALLEL_ORDERED */ @@ -1107,10 +1097,11 @@ // There is no point looking at t_fp_control_saved here. // If it is TRUE, we still have to update the values if they are different - // from those we now have. + // from + // those we now have. // If it is FALSE we didn't save anything yet, but our objective is the - // same. We have to ensure that the values in the team are the same as those - // we have. + // same. We + // have to ensure that the values in the team are the same as those we have. // So, this code achieves what we need whether or not t_fp_control_saved is // true. By checking whether the value needs updating we avoid unnecessary // writes that would put the cache-line into a written state, causing all @@ -1205,6 +1196,37 @@ this_thr->th.th_set_proc_bind = proc_bind_default; #endif /* OMP_40_ENABLED */ +#if OMPT_SUPPORT + ompt_data_t ompt_parallel_data; + ompt_parallel_data.ptr = NULL; + ompt_data_t *implicit_task_data; + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); + if (ompt_enabled.enabled && + this_thr->th.ompt_thread_info.state != omp_state_overhead) { + + ompt_task_info_t *parent_task_info; + // if (serial_team->t.t_level > 1) + parent_task_info = OMPT_CUR_TASK_INFO(this_thr); + // else + // parent_task_info = + // &(this_thr->th.th_current_task->td_parent->ompt_task_info); + + parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + // printf("281474976710657: %p frame\n", + // OMPT_CUR_TASK_INFO(this_thr)->frame.exit_runtime_frame); + if (ompt_enabled.ompt_callback_parallel_begin) { + int team_size = 1; + + ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( + &(parent_task_info->task_data), &(parent_task_info->frame), + &ompt_parallel_data, team_size, + // master_set_numthreads ? master_set_numthreads : get__nproc_2( + // parent_team, master_tid ), + ompt_invoker_program, codeptr); + } + } +#endif // OMPT_SUPPORT + if (this_thr->th.th_team != serial_team) { // Nested level will be an index in the nested nthreads array int level = this_thr->th.th_team->t.t_level; @@ -1216,13 +1238,9 @@ __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); -#endif - new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_data, #endif #if OMP_40_ENABLED proc_bind, @@ -1317,11 +1335,6 @@ } this_thr->th.th_dispatch = serial_team->t.t_dispatch; -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); - __ompt_team_assign_id(serial_team, ompt_parallel_id); -#endif - KMP_MB(); } else { @@ -1365,17 +1378,41 @@ if (__kmp_env_consistency_check) __kmp_push_parallel(global_tid, NULL); +#if OMPT_SUPPORT + serial_team->t.ompt_team_info.master_return_address = codeptr; + if (ompt_enabled.enabled && + this_thr->th.ompt_thread_info.state != omp_state_overhead) { + OMPT_CUR_TASK_INFO(this_thr) + ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + + ompt_lw_taskteam_t lw_taskteam; + __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, + &ompt_parallel_data, codeptr); + + __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1); + // don't use lw_taskteam after linking. content was swaped + + /* OMPT implicit task begin */ + implicit_task_data = OMPT_CUR_TASK_DATA(this_thr); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), + OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid)); + } + + /* OMPT state */ + this_thr->th.ompt_thread_info.state = omp_state_work_parallel; + OMPT_CUR_TASK_INFO(this_thr) + ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + } +#endif } /* most of the work for a fork */ /* return true if we really went parallel, false if serialized */ int __kmp_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context, // Intel, GNU, ... - kmp_int32 argc, -#if OMPT_SUPPORT - void *unwrapped_task, -#endif - microtask_t microtask, launch_t invoker, + kmp_int32 argc, microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX va_list *ap @@ -1433,16 +1470,18 @@ master_set_numthreads = master_th->th.th_set_nproc; #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id; - ompt_task_id_t ompt_task_id; + ompt_data_t ompt_parallel_data; + ompt_parallel_data.ptr = NULL; + ompt_data_t *parent_task_data; ompt_frame_t *ompt_frame; - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; + ompt_data_t *implicit_task_data; + void *return_address = NULL; - if (ompt_enabled) { - ompt_parallel_id = __ompt_parallel_id_new(gtid); - ompt_task_id = __ompt_get_task_id_internal(0); - ompt_frame = __ompt_get_task_frame_internal(0); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, + NULL, NULL); + // ompt_parallel_data = &(team->t.ompt_team_info.parallel_data); + return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); } #endif @@ -1466,13 +1505,18 @@ #endif #if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { - int team_size = master_set_numthreads; - - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( - ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task, - OMPT_INVOKER(call_context)); + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_parallel_begin) { + int team_size = master_set_numthreads + ? master_set_numthreads + : get__nproc_2(parent_team, master_tid); + ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( + parent_task_data, ompt_frame, &ompt_parallel_data, team_size, + // master_set_numthreads ? master_set_numthreads : get__nproc_2( + // parent_team, master_tid ), + OMPT_INVOKER(call_context), return_address); + } + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -1509,27 +1553,25 @@ ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task, - ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); + if (ompt_enabled.enabled) { + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, + &ompt_parallel_data, return_address); exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - __ompt_lw_taskteam_link(&lw_taskteam, master_th); + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); + // don't use lw_taskteam after linking. content was swaped -#if OMPT_TRACE /* OMPT implicit task begin */ - my_task_id = lw_taskteam.ompt_task_info.task_id; - my_parallel_id = parent_team->t.ompt_team_info.parallel_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); + implicit_task_data = OMPT_CUR_TASK_DATA(master_th); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + implicit_task_data, 1, __kmp_tid_from_gtid(gtid)); } -#endif /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1548,34 +1590,27 @@ #if OMPT_SUPPORT *exit_runtime_p = NULL; - if (ompt_enabled) { -#if OMPT_TRACE - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - ompt_parallel_id, ompt_task_id); + if (ompt_enabled.enabled) { + OMPT_CUR_TASK_INFO(master_th)->frame.exit_runtime_frame = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, implicit_task_data, 1, + __kmp_tid_from_gtid(gtid)); } - __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; -#endif - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th), + OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = ompt_state_overhead; + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif return TRUE; } parent_team->t.t_pkfn = microtask; -#if OMPT_SUPPORT - parent_team->t.ompt_team_info.microtask = unwrapped_task; -#endif parent_team->t.t_invoke = invoker; KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel); parent_team->t.t_active_level++; @@ -1727,28 +1762,27 @@ #if OMPT_SUPPORT void *dummy; void **exit_runtime_p; + ompt_task_info_t *task_info; ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { + if (ompt_enabled.enabled) { __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = - &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - - __ompt_lw_taskteam_link(&lw_taskteam, master_th); - -#if OMPT_TRACE - my_task_id = lw_taskteam.ompt_task_info.task_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - ompt_parallel_id, my_task_id); + &ompt_parallel_data, return_address); + + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); + // don't use lw_taskteam after linking. content was swaped + + task_info = OMPT_CUR_TASK_INFO(master_th); + exit_runtime_p = &(task_info->frame.exit_runtime_frame); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid)); } -#endif /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1767,26 +1801,21 @@ } #if OMPT_SUPPORT - *exit_runtime_p = NULL; - if (ompt_enabled) { - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - -#if OMPT_TRACE - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - ompt_parallel_id, ompt_task_id); + if (ompt_enabled.enabled) { + exit_runtime_p = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), 1, + __kmp_tid_from_gtid(gtid)); } -#endif __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + OMPT_CUR_TEAM_DATA(master_th), parent_task_data, + OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = ompt_state_overhead; + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif } else if (microtask == (microtask_t)__kmp_teams_master) { @@ -1835,30 +1864,28 @@ #if OMPT_SUPPORT void *dummy; void **exit_runtime_p; + ompt_task_info_t *task_info; ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { + if (ompt_enabled.enabled) { __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = - &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - - __ompt_lw_taskteam_link(&lw_taskteam, master_th); + &ompt_parallel_data, return_address); + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); + // don't use lw_taskteam after linking. content was swaped + task_info = OMPT_CUR_TASK_INFO(master_th); + exit_runtime_p = &(task_info->frame.exit_runtime_frame); -#if OMPT_TRACE /* OMPT implicit task begin */ - my_task_id = lw_taskteam.ompt_task_info.task_id; - my_parallel_id = ompt_parallel_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); + implicit_task_data = OMPT_CUR_TASK_DATA(master_th); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + implicit_task_data, 1, __kmp_tid_from_gtid(gtid)); } -#endif /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1876,26 +1903,22 @@ } #if OMPT_SUPPORT - *exit_runtime_p = NULL; - if (ompt_enabled) { -#if OMPT_TRACE - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - my_parallel_id, my_task_id); + if (ompt_enabled.enabled) { + *exit_runtime_p = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), 1, + __kmp_tid_from_gtid(gtid)); } -#endif + ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + &ompt_parallel_data, parent_task_data, + OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = ompt_state_overhead; + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif #if OMP_40_ENABLED @@ -1903,14 +1926,13 @@ #endif /* OMP_40_ENABLED */ } else if (call_context == fork_context_gnu) { #if OMPT_SUPPORT - ompt_lw_taskteam_t *lwt = - (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); - __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task, - ompt_parallel_id); + ompt_lw_taskteam_t lwt; + __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data, + return_address); - lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); - lwt->ompt_task_info.frame.exit_runtime_frame = NULL; - __ompt_lw_taskteam_link(lwt, master_th); + lwt.ompt_task_info.frame.exit_runtime_frame = NULL; + __ompt_lw_taskteam_link(&lwt, master_th, 1); +// don't use lw_taskteam after linking. content was swaped #endif // we were called from GNU native code @@ -2005,7 +2027,7 @@ KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); team = __kmp_allocate_team(root, nthreads, nthreads, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_data, #endif #if OMP_40_ENABLED proc_bind, @@ -2016,7 +2038,7 @@ KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); team = __kmp_allocate_team(root, nthreads, nthreads, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_data, #endif #if OMP_40_ENABLED proc_bind, @@ -2034,7 +2056,8 @@ KMP_CHECK_UPDATE(team->t.t_parent, parent_team); KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); #if OMPT_SUPPORT - KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task); + KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address, + return_address); #endif KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe // TODO: parent_team->t.t_level == INT_MAX ??? @@ -2168,7 +2191,7 @@ &master_th->th.th_current_task->td_icvs, loc); #if OMPT_SUPPORT - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; #endif __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); @@ -2252,8 +2275,8 @@ KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); #if OMPT_SUPPORT - if (ompt_enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; + if (ompt_enabled.enabled) { + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -2265,17 +2288,18 @@ kmp_team_t *team) { // restore state outside the region thread->th.ompt_thread_info.state = - ((team->t.t_serialized) ? ompt_state_work_serial - : ompt_state_work_parallel); + ((team->t.t_serialized) ? omp_state_work_serial + : omp_state_work_parallel); } -static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team, - ompt_parallel_id_t parallel_id, - fork_context_e fork_context) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); +static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, + kmp_team_t *team, ompt_data_t *parallel_data, + fork_context_e fork_context, void *codeptr) { + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context), + codeptr); } task_info->frame.reenter_runtime_frame = NULL; @@ -2312,8 +2336,8 @@ master_th->th.th_ident = loc; #if OMPT_SUPPORT - if (ompt_enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; + if (ompt_enabled.enabled) { + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -2350,7 +2374,7 @@ __kmpc_end_serialized_parallel(loc, gtid); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { __kmp_join_restore_state(master_th, parent_team); } #endif @@ -2378,7 +2402,8 @@ KMP_MB(); #if OMPT_SUPPORT - ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; + ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data); + void *codeptr = team->t.ompt_team_info.master_return_address; #endif #if USE_ITT_BUILD @@ -2450,8 +2475,9 @@ } #if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); + if (ompt_enabled.enabled) { + __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + codeptr); } #endif @@ -2480,15 +2506,18 @@ } KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - parallel_id, task_info->task_id); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_implicit_task) { + int ompt_team_size = team->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, + __kmp_tid_from_gtid(gtid)); } + task_info->frame.exit_runtime_frame = NULL; - task_info->task_id = 0; + task_info->task_data = ompt_data_none; } #endif @@ -2559,8 +2588,9 @@ __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); #if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); + if (ompt_enabled.enabled) { + __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + codeptr); } #endif @@ -3155,7 +3185,7 @@ 1, // new_nproc 1, // max_nproc #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED __kmp_nested_proc_bind.bind_types[0], @@ -3196,7 +3226,7 @@ 1, // new_nproc __kmp_dflt_team_nth_ub * 2, // max_nproc #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED __kmp_nested_proc_bind.bind_types[0], @@ -3735,6 +3765,9 @@ __kmp_print_thread_storage_map(root_thread, gtid); } root_thread->th.th_info.ds.ds_gtid = gtid; +#if OMPT_SUPPORT + root_thread->th.ompt_thread_info.thread_data.ptr = NULL; +#endif root_thread->th.th_root = root; if (__kmp_env_consistency_check) { root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); @@ -3757,7 +3790,7 @@ root_thread->th.th_serial_team = __kmp_allocate_team(root, 1, 1, #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED proc_bind_default, @@ -3827,6 +3860,29 @@ __kmp_root_counter++; +#if OMPT_SUPPORT + if (!initial_thread && ompt_enabled.enabled) { + + ompt_thread_t *root_thread = ompt_get_thread(); + + ompt_set_thread_state(root_thread, omp_state_overhead); + + if (ompt_enabled.ompt_callback_thread_begin) { + ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( + ompt_thread_initial, __ompt_get_thread_data_internal()); + } + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); + if (ompt_enabled.ompt_callback_task_create) { + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + NULL, NULL, task_data, ompt_task_initial, 0, NULL); + // initial task has nothing to return to + } + + ompt_set_thread_state(root_thread, omp_state_work_serial); + } +#endif + KMP_MB(); __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); @@ -3910,9 +3966,9 @@ #endif /* KMP_OS_WINDOWS */ #if OMPT_SUPPORT - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { - int gtid = __kmp_get_gtid(); - __ompt_thread_end(ompt_thread_initial, gtid); + if (ompt_enabled.ompt_callback_thread_end) { + ompt_callbacks.ompt_callback(ompt_callback_thread_end)( + &(root->r.r_uber_thread->th.ompt_thread_info.thread_data)); } #endif @@ -3962,7 +4018,7 @@ if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { #if OMPT_SUPPORT // the runtime is shutting down so we won't report any events - thread->th.ompt_thread_info.state = ompt_state_undefined; + thread->th.ompt_thread_info.state = omp_state_undefined; #endif __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); } @@ -4283,7 +4339,7 @@ new_thr->th.th_serial_team = serial_team = (kmp_team_t *)__kmp_allocate_team(root, 1, 1, #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED proc_bind_default, @@ -4814,7 +4870,7 @@ kmp_team_t * __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, + ompt_data_t ompt_parallel_data, #endif #if OMP_40_ENABLED kmp_proc_bind_t new_proc_bind, @@ -5181,7 +5237,7 @@ #endif #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_data); #endif KMP_MB(); @@ -5233,7 +5289,7 @@ team->t.t_id)); #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_data); #endif KMP_MB(); @@ -5297,7 +5353,7 @@ #endif #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_data); team->t.ompt_serialized_team_info = NULL; #endif @@ -5564,16 +5620,26 @@ } #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; + ompt_data_t *thread_data; + if (ompt_enabled.enabled) { + thread_data = &(this_thr->th.ompt_thread_info.thread_data); + thread_data->ptr = NULL; + + this_thr->th.ompt_thread_info.state = omp_state_overhead; this_thr->th.ompt_thread_info.wait_id = 0; - this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); - if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { - __ompt_thread_begin(ompt_thread_worker, gtid); + this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); + if (ompt_enabled.ompt_callback_thread_begin) { + ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( + ompt_thread_worker, thread_data); } } #endif +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + this_thr->th.ompt_thread_info.state = omp_state_idle; + } +#endif /* This is the place where threads wait for work */ while (!TCR_4(__kmp_global.g.g_done)) { KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); @@ -5582,18 +5648,12 @@ /* wait for work to do */ KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); -#if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_idle; - } -#endif - /* No tid yet since not part of a team */ __kmp_fork_barrier(gtid, KMP_GTID_DNE); #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; + if (ompt_enabled.enabled) { + this_thr->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -5601,14 +5661,6 @@ /* have we been allocated? */ if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { -#if OMPT_SUPPORT - ompt_task_info_t *task_info; - ompt_parallel_id_t my_parallel_id; - if (ompt_enabled) { - task_info = __ompt_get_taskinfo(0); - my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id; - } -#endif /* we were just woken up, so run our new task */ if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { int rc; @@ -5620,11 +5672,8 @@ updateHWFPControl(*pteam); #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; - // Initialize OMPT task id for implicit task. - int tid = __kmp_tid_from_gtid(gtid); - task_info->task_id = __ompt_task_id_new(tid); + if (ompt_enabled.enabled) { + this_thr->th.ompt_thread_info.state = omp_state_work_parallel; } #endif @@ -5635,40 +5684,29 @@ } KMP_ASSERT(rc); -#if OMPT_SUPPORT - if (ompt_enabled) { - /* no frame set while outside task */ - task_info->frame.exit_runtime_frame = NULL; - - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif KMP_MB(); KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); } - /* join barrier after parallel region */ - __kmp_join_barrier(gtid); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - // don't access *pteam here: it may have already been freed - // by the master thread behind the barrier (possible race) - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - my_parallel_id, task_info->task_id); - } - task_info->frame.exit_runtime_frame = NULL; - task_info->task_id = 0; +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + /* no frame set while outside task */ + __ompt_get_task_info_object(0)->frame.exit_runtime_frame = NULL; + + this_thr->th.ompt_thread_info.state = omp_state_overhead; + this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr); } #endif + /* join barrier after parallel region */ + __kmp_join_barrier(gtid); } } TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); #if OMPT_SUPPORT - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { - __ompt_thread_end(ompt_thread_worker, gtid); + if (ompt_enabled.ompt_callback_thread_end) { + ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data); } #endif @@ -5905,7 +5943,8 @@ // Need to check that monitor was initialized before reaping it. If we are // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then // __kmp_monitor will appear to contain valid data, but it is only valid in - // the parent process, not the child. + // the + // parent process, not the child. // New behavior (201008): instead of keying off of the flag // __kmp_init_parallel, the monitor thread creation is keyed off // of the new flag __kmp_init_monitor. @@ -6926,26 +6965,27 @@ #if OMPT_SUPPORT void *dummy; void **exit_runtime_p; - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; + ompt_data_t *my_task_data; + ompt_data_t *my_parallel_data; + int ompt_team_size; - if (ompt_enabled) { + if (ompt_enabled.enabled) { exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid] .ompt_task_info.frame.exit_runtime_frame); } else { exit_runtime_p = &dummy; } -#if OMPT_TRACE - my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id, - my_task_id); + my_task_data = + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data); + my_parallel_data = &(team->t.ompt_team_info.parallel_data); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_team_size = team->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, + __kmp_tid_from_gtid(gtid)); } #endif -#endif { KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); @@ -6992,9 +7032,6 @@ SSC_MARK_FORKING(); #endif __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, -#if OMPT_SUPPORT - (void *)thr->th.th_teams_microtask, // "unwrapped" task -#endif (microtask_t)thr->th.th_teams_microtask, // "wrapped" task VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); #if INCLUDE_SSC_MARKS @@ -7171,6 +7208,36 @@ #endif /* KMP_DEBUG */ __kmp_join_barrier(gtid); /* wait for everyone */ +#if OMPT_SUPPORT + int ds_tid = this_thr->th.th_info.ds.ds_tid; + if (this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) { + ompt_data_t *tId = OMPT_CUR_TASK_DATA(this_thr); + ompt_data_t *pId = OMPT_CUR_TEAM_DATA(this_thr); + this_thr->th.ompt_thread_info.state = omp_state_overhead; +#if OMPT_OPTIONAL + void *codeptr = NULL; + if (KMP_MASTER_TID(ds_tid) && + (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || + ompt_callbacks.ompt_callback(ompt_callback_sync_region))) + codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address; + + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr); + } +#endif + if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, tId, 0, ds_tid); + } + // return to idle state + this_thr->th.ompt_thread_info.state = omp_state_overhead; + } +#endif KMP_MB(); /* Flush all pending memory write invalidates. */ KMP_ASSERT(this_thr->th.th_team == team); Index: runtime/src/kmp_sched.cpp =================================================================== --- runtime/src/kmp_sched.cpp +++ runtime/src/kmp_sched.cpp @@ -44,7 +44,12 @@ T *plower, T *pupper, typename traits_t::signed_t *pstride, typename traits_t::signed_t incr, - typename traits_t::signed_t chunk) { + typename traits_t::signed_t chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + void *codeptr +#endif + ) { KMP_COUNT_BLOCK(OMP_FOR_static); KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling); @@ -58,14 +63,29 @@ kmp_team_t *team; kmp_info_t *th = __kmp_threads[gtid]; -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL ompt_team_info_t *team_info = NULL; ompt_task_info_t *task_info = NULL; + ompt_work_type_t ompt_work_type; - if (ompt_enabled) { + if (ompt_enabled.enabled) { // Only fully initialize variables needed by OMPT if OMPT is enabled. team_info = __ompt_get_teaminfo(0, NULL); - task_info = __ompt_get_taskinfo(0); + task_info = __ompt_get_task_info_object(0); + // Determine workshare type + if (loc != NULL) { + if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { + ompt_work_type = ompt_work_loop; + } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { + ompt_work_type = ompt_work_sections; + } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { + ompt_work_type = ompt_work_distribute; + } else { + KMP_ASSERT2(0, + "__kmpc_for_static_init: can't determine workshare type"); + } + KMP_DEBUG_ASSERT(ompt_work_type); + } } #endif @@ -119,10 +139,11 @@ #endif KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), 0, codeptr); } #endif KMP_COUNT_VALUE(FOR_static_iterations, 0); @@ -170,10 +191,11 @@ #endif KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), *pstride, codeptr); } #endif return; @@ -198,10 +220,11 @@ #endif KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), *pstride, codeptr); } #endif return; @@ -354,10 +377,11 @@ #endif KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), trip_count, codeptr); } #endif @@ -745,7 +769,12 @@ kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk) { __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk); + pupper, pstride, incr, chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_GET_RETURN_ADDRESS(0) +#endif + ); } /*! @@ -757,7 +786,12 @@ kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk) { __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk); + pupper, pstride, incr, chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_GET_RETURN_ADDRESS(0) +#endif + ); } /*! @@ -768,7 +802,12 @@ kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk) { __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk); + pupper, pstride, incr, chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_GET_RETURN_ADDRESS(0) +#endif + ); } /*! @@ -780,7 +819,12 @@ kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk) { __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk); + pupper, pstride, incr, chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_GET_RETURN_ADDRESS(0) +#endif + ); } /*! @} Index: runtime/src/kmp_settings.cpp =================================================================== --- runtime/src/kmp_settings.cpp +++ runtime/src/kmp_settings.cpp @@ -334,13 +334,11 @@ } } // __kmp_stg_parse_size -#if KMP_AFFINITY_SUPPORTED static void __kmp_stg_parse_str(char const *name, char const *value, char const **out) { __kmp_str_free(out); *out = __kmp_str_format("%s", value); } // __kmp_stg_parse_str -#endif static void __kmp_stg_parse_int( char const @@ -4354,7 +4352,29 @@ #endif -// ----------------------------------------------------------------------------- +#if OMP_50_ENABLED && OMPT_SUPPORT + +static void __kmp_stg_parse_omp_tool_libraries(char const *name, + char const *value, void *data) { + __kmp_stg_parse_str(name, value, &__kmp_tool_libraries); +} // __kmp_stg_parse_omp_tool_libraries + +static void __kmp_stg_print_omp_tool_libraries(kmp_str_buf_t *buffer, + char const *name, void *data) { + if (__kmp_tool_libraries) + __kmp_stg_print_str(buffer, name, __kmp_tool_libraries); + else { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); + } + __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); + } +} // __kmp_stg_print_omp_tool_libraries + +#endif + // Table. static kmp_setting_t __kmp_stg_table[] = { @@ -4598,6 +4618,12 @@ {"OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation, __kmp_stg_print_omp_cancellation, NULL, 0, 0}, #endif + +#if OMP_50_ENABLED && OMPT_SUPPORT + {"OMP_TOOL_LIBRARIES", __kmp_stg_parse_omp_tool_libraries, + __kmp_stg_print_omp_tool_libraries, NULL, 0, 0}, +#endif + {"", NULL, NULL, NULL, 0, 0}}; // settings static int const __kmp_stg_count = Index: runtime/src/kmp_taskdeps.cpp =================================================================== --- runtime/src/kmp_taskdeps.cpp +++ runtime/src/kmp_taskdeps.cpp @@ -16,6 +16,9 @@ #include "kmp.h" #include "kmp_io.h" #include "kmp_wait_release.h" +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif #if OMP_40_ENABLED @@ -217,18 +220,19 @@ task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource); #endif -#if OMPT_SUPPORT && OMPT_TRACE - // OMPT tracks dependences between task (a=source, b=sink) in which - // task a blocks the execution of b through the ompt_new_dependence_callback - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + /* OMPT tracks dependences between task (a=source, b=sink) in which + task a blocks the execution of b through the ompt_new_dependence_callback + */ + if (ompt_enabled.ompt_callback_task_dependence) { kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); - ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)( - task_source->ompt_task_info.task_id, task_sink->ompt_task_info.task_id); + ompt_callbacks.ompt_callback(ompt_callback_task_dependence)( + &(task_source->ompt_task_info.task_data), + &(task_sink->ompt_task_info.task_data)); } -#endif /* OMPT_SUPPORT && OMPT_TRACE */ +#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */ } template @@ -470,10 +474,29 @@ kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *current_task = thread->th.th_current_task; -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); + + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_task_create) { + kmp_taskdata_t *parent = new_taskdata->td_parent; + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + parent ? &(parent->ompt_task_info.task_data) : &task_data, + parent ? &(parent->ompt_task_info.frame) : NULL, + &(new_taskdata->ompt_task_info.task_data), + ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1, + OMPT_LOAD_RETURN_ADDRESS(gtid)); + } + + new_taskdata->ompt_task_info.frame.reenter_runtime_frame = + OMPT_GET_FRAME_ADDRESS(0); + } + +#if OMPT_OPTIONAL /* OMPT grab all dependences if requested by the tool */ - if (ompt_enabled && ndeps + ndeps_noalias > 0 && - ompt_callbacks.ompt_callback(ompt_event_task_dependences)) { + if (ndeps + ndeps_noalias > 0 && + ompt_enabled.ompt_callback_task_dependences) { kmp_int32 i; new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias; @@ -509,8 +532,17 @@ new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags = ompt_task_dependence_type_in; } + ompt_callbacks.ompt_callback(ompt_callback_task_dependences)( + &(new_taskdata->ompt_task_info.task_data), + new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps); + /* We can now free the allocated memory for the dependencies */ + /* For OMPD we might want to delay the free until task_end */ + KMP_OMPT_DEPS_FREE(thread, new_taskdata->ompt_task_info.deps); + new_taskdata->ompt_task_info.deps = NULL; + new_taskdata->ompt_task_info.ndeps = 0; } -#endif /* OMPT_SUPPORT && OMPT_TRACE */ +#endif /* OMPT_OPTIONAL */ +#endif /* OMPT_SUPPORT */ bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || @@ -557,7 +589,7 @@ "loc=%p task=%p, transferring to __kmpc_omp_task\n", gtid, loc_ref, new_taskdata)); - return __kmpc_omp_task(loc_ref, gtid, new_task); + return __kmp_omp_task(gtid, new_task, true); } /*! Index: runtime/src/kmp_tasking.cpp =================================================================== --- runtime/src/kmp_tasking.cpp +++ runtime/src/kmp_tasking.cpp @@ -446,31 +446,123 @@ KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata)); + return; +} + #if OMPT_SUPPORT - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_begin)) { - kmp_taskdata_t *parent = taskdata->td_parent; - ompt_callbacks.ompt_callback(ompt_event_task_begin)( - parent ? parent->ompt_task_info.task_id : ompt_task_id_none, - parent ? &(parent->ompt_task_info.frame) : NULL, - taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.function); - } -#endif -#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE - /* OMPT emit all dependences if requested by the tool */ - if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 && - ompt_callbacks.ompt_callback(ompt_event_task_dependences)) { - ompt_callbacks.ompt_callback(ompt_event_task_dependences)( - taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.deps, - taskdata->ompt_task_info.ndeps); - /* We can now free the allocated memory for the dependencies */ - KMP_OMPT_DEPS_FREE(thread, taskdata->ompt_task_info.deps); - taskdata->ompt_task_info.deps = NULL; - taskdata->ompt_task_info.ndeps = 0; - } -#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */ +//------------------------------------------------------------------------------ +// __ompt_task_init: +// Initialize OMPT fields maintained by a task. This will only be called after +// ompt_start_tool, so we already know whether ompt is enabled or not. + +static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) { + // The calls to __ompt_task_init already have the ompt_enabled condition. + task->ompt_task_info.task_data.value = 0; + task->ompt_task_info.frame.exit_runtime_frame = NULL; + task->ompt_task_info.frame.reenter_runtime_frame = NULL; +#if OMP_40_ENABLED + task->ompt_task_info.ndeps = 0; + task->ompt_task_info.deps = NULL; +#endif /* OMP_40_ENABLED */ +} - return; +// __ompt_task_start: +// Build and trigger task-begin event +static inline void __ompt_task_start(kmp_task_t *task, + kmp_taskdata_t *current_task, + kmp_int32 gtid) { + // The calls to __ompt_task_start already have the ompt_enabled condition. + // if (__builtin_expect(ompt_enabled.enabled,0)){ + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + ompt_task_status_t status = ompt_task_others; + if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) { + status = ompt_task_yield; + __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0; + } + /* let OMPT know that we're about to run this task */ + if (ompt_enabled.ompt_callback_task_schedule) { + ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( + &(current_task->ompt_task_info.task_data), status, + &(taskdata->ompt_task_info.task_data)); + } + taskdata->ompt_task_info.scheduling_parent = current_task; + // } +} + +// __ompt_task_start: +// Build and trigger task-end event +static inline void __ompt_task_finish(kmp_task_t *task, + kmp_taskdata_t *resumed_task) { + // The calls to __ompt_task_finish already have the ompt_enabled condition. + // if (__builtin_expect(ompt_enabled.enabled,0)){ + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + ompt_task_status_t status = ompt_task_complete; + if (taskdata->td_flags.tiedness == TASK_UNTIED && + KMP_TEST_THEN_ADD32(&(taskdata->td_untied_count), 0) > 1) + status = ompt_task_others; + if (__kmp_omp_cancellation && taskdata->td_taskgroup && + taskdata->td_taskgroup->cancel_request == cancel_taskgroup) { + status = ompt_task_cancel; + } + + /* let OMPT know that we're returning to the callee task */ + if (ompt_enabled.ompt_callback_task_schedule) { + ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( + &(taskdata->ompt_task_info.task_data), status, + &((resumed_task ? resumed_task + : (taskdata->ompt_task_info.scheduling_parent + ? taskdata->ompt_task_info.scheduling_parent + : taskdata->td_parent)) + ->ompt_task_info.task_data)); + } + // } +} +#endif + +#if OMPT_SUPPORT +OMPT_NOINLINE +static void __ompt_enabled_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, void *frame_address, + void *return_address) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; + + KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " + "current_task=%p\n", + gtid, loc_ref, taskdata, current_task)); + + if (taskdata->td_flags.tiedness == TASK_UNTIED) { + // untied task needs to increment counter so that the task structure is not + // freed prematurely + kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count); + KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " + "incremented for task %p\n", + gtid, counter, taskdata)); + } + + taskdata->td_flags.task_serial = + 1; // Execute this task immediately, not deferred. + + __kmp_task_start(gtid, task, current_task); + + if (current_task->ompt_task_info.frame.reenter_runtime_frame == NULL) { + current_task->ompt_task_info.frame.reenter_runtime_frame = + taskdata->ompt_task_info.frame.exit_runtime_frame = frame_address; + } + if (ompt_enabled.ompt_callback_task_create) { + ompt_task_info_t *parent_info = &(current_task->ompt_task_info); + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + &(parent_info->task_data), &(parent_info->frame), + &(taskdata->ompt_task_info.task_data), + ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0, + return_address); + } + __ompt_task_start(task, current_task, gtid); + + KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid, + loc_ref, taskdata)); } +#endif // OMPT_SUPPORT // __kmpc_omp_task_begin_if0: report that a given serialized task has started // execution @@ -480,6 +572,15 @@ // task: task thunk for the started task. void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) { +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); + if (UNLIKELY(ompt_enabled.enabled)) { + __ompt_enabled_task_begin_if0(loc_ref, gtid, task, + OMPT_GET_FRAME_ADDRESS(1), + OMPT_LOAD_RETURN_ADDRESS(gtid)); + return; + } +#endif kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; @@ -623,14 +724,6 @@ thread->th.th_task_team; // might be NULL for serial teams... kmp_int32 children = 0; -#if OMPT_SUPPORT - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_end)) { - kmp_taskdata_t *parent = taskdata->td_parent; - ompt_callbacks.ompt_callback(ompt_event_task_end)( - taskdata->ompt_task_info.task_id); - } -#endif - KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming " "task %p\n", gtid, taskdata, resumed_task)); @@ -760,6 +853,28 @@ return; } +#if OMPT_SUPPORT +OMPT_NOINLINE +static void __ompt_enabled_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { + KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); + + // this routine will provide task to resume + __kmp_task_finish(gtid, task, NULL); + + KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); + + __ompt_task_finish(task, NULL); + ompt_frame_t *ompt_frame; + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = NULL; + + return; +} +#endif + // __kmpc_omp_task_complete_if0: report that a task has completed execution // // loc_ref: source location information; points to end of task block. @@ -767,6 +882,12 @@ // task: task thunk for the completed task. void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) { +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled)) { + __ompt_enabled_task_complete_if0(loc_ref, gtid, task); + return; + } +#endif KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); // this routine will provide task to resume @@ -793,25 +914,6 @@ } #endif // TASK_UNUSED -#if OMPT_SUPPORT -// __kmp_task_init_ompt: Initialize OMPT fields maintained by a task. This will -// only be called after ompt_tool, so we already know whether ompt is enabled -// or not. -static inline void __kmp_task_init_ompt(kmp_taskdata_t *task, int tid, - void *function) { - if (ompt_enabled) { - task->ompt_task_info.task_id = __ompt_task_id_new(tid); - task->ompt_task_info.function = function; - task->ompt_task_info.frame.exit_runtime_frame = NULL; - task->ompt_task_info.frame.reenter_runtime_frame = NULL; -#if OMP_40_ENABLED - task->ompt_task_info.ndeps = 0; - task->ompt_task_info.deps = NULL; -#endif /* OMP_40_ENABLED */ - } -} -#endif - // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit // task for a given thread // @@ -876,7 +978,8 @@ } #if OMPT_SUPPORT - __kmp_task_init_ompt(task, tid, NULL); + if (__builtin_expect(ompt_enabled.enabled, 0)) + __ompt_task_init(task, tid); #endif KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid, @@ -1121,7 +1224,8 @@ ANNOTATE_HAPPENS_BEFORE(task); #if OMPT_SUPPORT - __kmp_task_init_ompt(taskdata, gtid, (void *)task_entry); + if (__builtin_expect(ompt_enabled.enabled, 0)) + __ompt_task_init(taskdata, gtid); #endif return task; @@ -1207,7 +1311,7 @@ if (taskdata->td_flags.proxy != TASK_PROXY) { #endif ANNOTATE_HAPPENS_AFTER(task); - __kmp_task_start(gtid, task, current_task); + __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded #if OMP_45_ENABLED } #endif @@ -1215,14 +1319,16 @@ #if OMPT_SUPPORT ompt_thread_info_t oldInfo; kmp_info_t *thread; - if (ompt_enabled) { + if (__builtin_expect(ompt_enabled.enabled, 0)) { // Store the threads states and restore them after the task thread = __kmp_threads[gtid]; oldInfo = thread->th.ompt_thread_info; thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = ompt_state_work_parallel; + thread->th.ompt_thread_info.state = (thread->th.th_team_serialized) + ? omp_state_work_serial + : omp_state_work_parallel; taskdata->ompt_task_info.frame.exit_runtime_frame = - __builtin_frame_address(0); + OMPT_GET_FRAME_ADDRESS(0); } #endif @@ -1236,6 +1342,18 @@ kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_data_t *task_data; + if (__builtin_expect(ompt_enabled.ompt_callback_cancel, 0)) { + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); + ompt_callbacks.ompt_callback(ompt_callback_cancel)( + task_data, + ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup + : ompt_cancel_parallel) | + ompt_cancel_discarded_task, + NULL); + } +#endif KMP_COUNT_BLOCK(TASK_cancelled); // this task belongs to a task group and we need to cancel it discard = 1 /* true */; @@ -1270,13 +1388,10 @@ #endif // KMP_STATS_ENABLED #endif // OMP_40_ENABLED -#if OMPT_SUPPORT && OMPT_TRACE - /* let OMPT know that we're about to run this task */ - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) { - ompt_callbacks.ompt_callback(ompt_event_task_switch)( - current_task->ompt_task_info.task_id, - taskdata->ompt_task_info.task_id); - } +// OMPT task begin +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled, 0)) + __ompt_task_start(task, current_task, gtid); #endif #ifdef KMP_GOMP_COMPAT @@ -1289,21 +1404,16 @@ } KMP_POP_PARTITIONED_TIMER(); -#if OMPT_SUPPORT && OMPT_TRACE - /* let OMPT know that we're returning to the callee task */ - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) { - ompt_callbacks.ompt_callback(ompt_event_task_switch)( - taskdata->ompt_task_info.task_id, - current_task->ompt_task_info.task_id); - } +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled, 0)) + __ompt_task_finish(task, current_task); #endif - #if OMP_40_ENABLED } #endif // OMP_40_ENABLED #if OMPT_SUPPORT - if (ompt_enabled) { + if (__builtin_expect(ompt_enabled.enabled, 0)) { thread->th.ompt_thread_info = oldInfo; taskdata->ompt_task_info.frame.exit_runtime_frame = NULL; } @@ -1314,7 +1424,7 @@ if (taskdata->td_flags.proxy != TASK_PROXY) { #endif ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent); - __kmp_task_finish(gtid, task, current_task); + __kmp_task_finish(gtid, task, current_task); // OMPT only if not discarded #if OMP_45_ENABLED } #endif @@ -1352,6 +1462,23 @@ KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, new_taskdata)); +#if OMPT_SUPPORT + kmp_taskdata_t *parent; + if (__builtin_expect(ompt_enabled.enabled, 0)) { + parent = new_taskdata->td_parent; + // parent->ompt_task_info.frame.reenter_runtime_frame = + // OMPT_GET_FRAME_ADDRESS(1); + if (ompt_enabled.ompt_callback_task_create) { + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + parent ? &(parent->ompt_task_info.task_data) : &task_data, + parent ? &(parent->ompt_task_info.frame) : NULL, + &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0, + OMPT_GET_RETURN_ADDRESS(0)); + } + } +#endif + /* Should we execute the new task or queue it? For now, let's just always try to queue it. If the queue fills up, then we'll execute it. */ @@ -1369,6 +1496,11 @@ gtid, loc_ref, new_taskdata)); ANNOTATE_HAPPENS_BEFORE(new_task); +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled, 0)) { + parent->ompt_task_info.frame.reenter_runtime_frame = NULL; + } +#endif return TASK_CURRENT_NOT_QUEUED; } @@ -1387,13 +1519,6 @@ bool serialize_immediate) { kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); -#if OMPT_SUPPORT - if (ompt_enabled) { - new_taskdata->ompt_task_info.frame.reenter_runtime_frame = - __builtin_frame_address(1); - } -#endif - /* Should we execute the new task or queue it? For now, let's just always try to queue it. If the queue fills up, then we'll execute it. */ #if OMP_45_ENABLED @@ -1409,12 +1534,6 @@ __kmp_invoke_task(gtid, new_task, current_task); } -#if OMPT_SUPPORT - if (ompt_enabled) { - new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; - } -#endif - ANNOTATE_HAPPENS_BEFORE(new_task); return TASK_CURRENT_NOT_QUEUED; } @@ -1436,23 +1555,51 @@ kmp_int32 res; KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); -#if KMP_DEBUG +#if KMP_DEBUG || OMPT_SUPPORT kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); #endif KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, new_taskdata)); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); + kmp_taskdata_t *parent = NULL; + if (__builtin_expect(ompt_enabled.enabled && !new_taskdata->td_flags.started, + 0)) { + parent = new_taskdata->td_parent; + if (!parent->ompt_task_info.frame.reenter_runtime_frame) + parent->ompt_task_info.frame.reenter_runtime_frame = + OMPT_GET_FRAME_ADDRESS(1); + if (ompt_enabled.ompt_callback_task_create) { + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + parent ? &(parent->ompt_task_info.task_data) : &task_data, + parent ? &(parent->ompt_task_info.frame) : NULL, + &(new_taskdata->ompt_task_info.task_data), + ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, + OMPT_LOAD_RETURN_ADDRESS(gtid)); + } + } +#endif + res = __kmp_omp_task(gtid, new_task, true); KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata)); +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled && parent != NULL, 0)) { + parent->ompt_task_info.frame.reenter_runtime_frame = NULL; + } +#endif return res; } -// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are -// complete -kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL +OMPT_NOINLINE +static kmp_int32 __ompt_enabled_taskwait(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, + void *return_address) { kmp_taskdata_t *taskdata; kmp_info_t *thread; int thread_finished = FALSE; @@ -1463,23 +1610,26 @@ if (__kmp_tasking_mode != tskm_immediate_exec) { thread = __kmp_threads[gtid]; taskdata = thread->th.th_current_task; -#if OMPT_SUPPORT && OMPT_TRACE - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; - - if (ompt_enabled) { - kmp_team_t *team = thread->th.th_team; - my_task_id = taskdata->ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - - taskdata->ompt_task_info.frame.reenter_runtime_frame = - __builtin_frame_address(1); - if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) { - ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(my_parallel_id, - my_task_id); - } + + ompt_data_t *my_task_data; + ompt_data_t *my_parallel_data; + + my_task_data = &(taskdata->ompt_task_info.task_data); + my_parallel_data = OMPT_CUR_TEAM_DATA(thread); + + taskdata->ompt_task_info.frame.reenter_runtime_frame = frame_address; + + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, + my_task_data, return_address); + } + + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, + my_task_data, return_address); } -#endif // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. @@ -1522,15 +1672,90 @@ // negated. taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) { - ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(my_parallel_id, - my_task_id); - } - taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, + my_task_data, return_address); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, + my_task_data, return_address); } + taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; + + ANNOTATE_HAPPENS_AFTER(taskdata); + } + + KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " + "returning TASK_CURRENT_NOT_QUEUED\n", + gtid, taskdata)); + + return TASK_CURRENT_NOT_QUEUED; +} +#endif // OMPT_SUPPORT && OMPT_OPTIONAL + +// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are +// complete +kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (UNLIKELY(ompt_enabled.enabled)) { + return __ompt_enabled_taskwait(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(1), + OMPT_GET_RETURN_ADDRESS(0)); + } #endif + kmp_taskdata_t *taskdata; + kmp_info_t *thread; + int thread_finished = FALSE; + KMP_SET_THREAD_STATE_BLOCK(TASKWAIT); + + KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref)); + + if (__kmp_tasking_mode != tskm_immediate_exec) { + thread = __kmp_threads[gtid]; + taskdata = thread->th.th_current_task; + +// Debugger: The taskwait is active. Store location and thread encountered the +// taskwait. +#if USE_ITT_BUILD +// Note: These values are used by ITT events as well. +#endif /* USE_ITT_BUILD */ + taskdata->td_taskwait_counter += 1; + taskdata->td_taskwait_ident = loc_ref; + taskdata->td_taskwait_thread = gtid + 1; + +#if USE_ITT_BUILD + void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); + if (itt_sync_obj != NULL) + __kmp_itt_taskwait_starting(gtid, itt_sync_obj); +#endif /* USE_ITT_BUILD */ + + bool must_wait = + !taskdata->td_flags.team_serial && !taskdata->td_flags.final; + +#if OMP_45_ENABLED + must_wait = must_wait || (thread->th.th_task_team != NULL && + thread->th.th_task_team->tt.tt_found_proxy_tasks); +#endif + if (must_wait) { + kmp_flag_32 flag( + RCAST(volatile kmp_uint32 *, &taskdata->td_incomplete_child_tasks), + 0U); + while (TCR_4(taskdata->td_incomplete_child_tasks) != 0) { + flag.execute_tasks(thread, gtid, FALSE, + &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), + __kmp_task_stealing_constraint); + } + } +#if USE_ITT_BUILD + if (itt_sync_obj != NULL) + __kmp_itt_taskwait_finished(gtid, itt_sync_obj); +#endif /* USE_ITT_BUILD */ + + // Debugger: The taskwait is completed. Location remains, but thread is + // negated. + taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; + ANNOTATE_HAPPENS_AFTER(taskdata); } @@ -1575,10 +1800,18 @@ kmp_task_team_t *task_team = thread->th.th_task_team; if (task_team != NULL) { if (KMP_TASKING_ENABLED(task_team)) { +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled, 0)) + thread->th.ompt_thread_info.ompt_task_yielded = 1; +#endif __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint); +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled, 0)) + thread->th.ompt_thread_info.ompt_task_yielded = 0; +#endif } } } @@ -1809,6 +2042,22 @@ tg_new->reduce_num_data = 0; #endif taskdata->td_taskgroup = tg_new; + +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region, 0)) { + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + kmp_team_t *team = thread->th.th_team; + ompt_data_t my_task_data = taskdata->ompt_task_info.task_data; + // FIXME: I think this is wrong for lwt! + ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data; + + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), + &(my_task_data), codeptr); + } +#endif } // __kmpc_end_taskgroup: Wait until all tasks generated by the current task @@ -1819,6 +2068,22 @@ kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; int thread_finished = FALSE; +#if OMPT_SUPPORT && OMPT_OPTIONAL + kmp_team_t *team; + ompt_data_t my_task_data; + ompt_data_t my_parallel_data; + void *codeptr; + if (__builtin_expect(ompt_enabled.enabled, 0)) { + team = thread->th.th_team; + my_task_data = taskdata->ompt_task_info.task_data; + // FIXME: I think this is wrong for lwt! + my_parallel_data = team->t.ompt_team_info.parallel_data; + codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + } +#endif + KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc)); KMP_DEBUG_ASSERT(taskgroup != NULL); KMP_SET_THREAD_STATE_BLOCK(TASKGROUP); @@ -1832,6 +2097,14 @@ __kmp_itt_taskwait_starting(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region_wait, 0)) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), + &(my_task_data), codeptr); + } +#endif + #if OMP_45_ENABLED if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && @@ -1848,6 +2121,14 @@ } } +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region_wait, 0)) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), + &(my_task_data), codeptr); + } +#endif + #if USE_ITT_BUILD if (itt_sync_obj != NULL) __kmp_itt_taskwait_finished(gtid, itt_sync_obj); @@ -1867,6 +2148,14 @@ KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata)); ANNOTATE_HAPPENS_AFTER(taskdata); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region, 0)) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), + &(my_task_data), codeptr); + } +#endif } #endif @@ -3255,8 +3544,8 @@ ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n", thread, taskdata, taskdata->td_parent)); #if OMPT_SUPPORT - __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, - (void *)task->routine); + if (__builtin_expect(ompt_enabled.enabled, 0)) + __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid); #endif return task; } @@ -3539,8 +3828,25 @@ "grain %llu(%d), dup %p\n", gtid, taskdata, *lb, *ub, st, grainsize, sched, task_dup)); - if (nogroup == 0) +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), + 0, // TODO: OMPT: verify loop count value (OpenMP-spec 4.6.2.18). ?? + // Should ve 'tc' value below? + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif + + if (nogroup == 0) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_taskgroup(loc, gtid); + } // ========================================================================= // calculate loop parameters @@ -3614,6 +3920,9 @@ if (if_val == 0) { // if(0) specified, mark task as serial taskdata->td_flags.task_serial = 1; taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif // always start serial tasks linearly __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, task_dup); @@ -3621,18 +3930,38 @@ KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" "(%lld), grain %llu, extras %llu\n", gtid, tc, num_tasks, num_tasks_min, grainsize, extras)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, num_tasks_min, task_dup); } else { KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" "(%lld), grain %llu, extras %llu\n", gtid, tc, num_tasks, num_tasks_min, grainsize, extras)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, task_dup); } - if (nogroup == 0) + if (nogroup == 0) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_end_taskgroup(loc, gtid); + } +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), + &(task_info->task_data), + 0, // TODO: OMPT: verify loop count value (OpenMP-spec 4.6.2.18). ?? + // Should ve 'tc' value below? + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); } Index: runtime/src/kmp_wait_release.h =================================================================== --- runtime/src/kmp_wait_release.h +++ runtime/src/kmp_wait_release.h @@ -17,6 +17,9 @@ #include "kmp.h" #include "kmp_itt.h" #include "kmp_stats.h" +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif /*! @defgroup WAIT_RELEASE Wait/Release operations @@ -85,6 +88,44 @@ */ }; +#if OMPT_SUPPORT +static inline void __ompt_implicit_task_end(kmp_info_t *this_thr, + omp_state_t omp_state, + ompt_data_t *tId, + ompt_data_t *pId) { + int ds_tid = this_thr->th.th_info.ds.ds_tid; + if (omp_state == omp_state_wait_barrier_implicit) { + this_thr->th.ompt_thread_info.state = omp_state_overhead; +#if OMPT_OPTIONAL + void *codeptr = NULL; + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr); + } +#endif + if (!KMP_MASTER_TID(ds_tid)) { + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, tId, 0, ds_tid); + } +#if OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_idle) { + ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin); + } +#endif + // return to idle state + this_thr->th.ompt_thread_info.state = omp_state_idle; + } else { + this_thr->th.ompt_thread_info.state = omp_state_overhead; + } + } +} +#endif + /* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_* must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */ @@ -116,30 +157,88 @@ stats_state_e thread_state = KMP_GET_THREAD_STATE(); #endif -#if OMPT_SUPPORT && OMPT_BLAME - ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state; - if (ompt_enabled && ompt_state != ompt_state_undefined) { - if (ompt_state == ompt_state_idle) { - if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) { - ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1); - } - } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) { - KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || - ompt_state == ompt_state_wait_barrier_implicit || - ompt_state == ompt_state_wait_barrier_explicit); - +/* OMPT Behavior: +THIS function is called from + __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions) + these have join / fork behavior + + In these cases, we don't change the state or trigger events in THIS +function. + Events are triggered in the calling code (__kmp_barrier): + + state := omp_state_overhead + barrier-begin + barrier-wait-begin + state := omp_state_wait_barrier + call join-barrier-implementation (finally arrive here) + {} + call fork-barrier-implementation (finally arrive here) + {} + state := omp_state_overhead + barrier-wait-end + barrier-end + state := omp_state_work_parallel + + + __kmp_fork_barrier (after thread creation, before executing implicit task) + call fork-barrier-implementation (finally arrive here) + {} // worker arrive here with state = omp_state_idle + + + __kmp_join_barrier (implicit barrier at end of parallel region) + state := omp_state_barrier_implicit + barrier-begin + barrier-wait-begin + call join-barrier-implementation (finally arrive here +final_spin=FALSE) + { + } + __kmp_fork_barrier (implicit barrier at end of parallel region) + call fork-barrier-implementation (finally arrive here final_spin=TRUE) + + Worker after task-team is finished: + barrier-wait-end + barrier-end + implicit-task-end + idle-begin + state := omp_state_idle + + Before leaving, if state = omp_state_idle + idle-end + state := omp_state_overhead +*/ +#if OMPT_SUPPORT + omp_state_t ompt_entry_state; + ompt_data_t *pId = NULL; + ompt_data_t *tId; + if (ompt_enabled.enabled) { + ompt_entry_state = this_thr->th.ompt_thread_info.state; + if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit || + KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { ompt_lw_taskteam_t *team = this_thr->th.th_team->t.ompt_serialized_team_info; - ompt_parallel_id_t pId; - ompt_task_id_t tId; if (team) { - pId = team->ompt_team_info.parallel_id; - tId = team->ompt_task_info.task_id; + pId = &(team->ompt_team_info.parallel_data); + tId = &(team->ompt_task_info.task_data); } else { - pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; - tId = this_thr->th.th_current_task->ompt_task_info.task_id; + pId = OMPT_CUR_TEAM_DATA(this_thr); + tId = OMPT_CUR_TASK_DATA(this_thr); + } + } else { + pId = NULL; + tId = &(this_thr->th.ompt_thread_info.task_data); + } +#if OMPT_OPTIONAL + if (ompt_entry_state == omp_state_idle) { + if (ompt_enabled.ompt_callback_idle) { + ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin); } - ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId); + } else +#endif + if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec || + this_thr->th.th_task_team == NULL)) { + // implicit task is done. Either no taskqueue, or task-team finished + __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId); } } #endif @@ -206,6 +305,11 @@ this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; } else { KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); +#if OMPT_SUPPORT + // task-team is done now, other cases should be catched above + if (final_spin && ompt_enabled.enabled) + __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId); +#endif this_thr->th.th_task_team = NULL; this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; } @@ -293,29 +397,22 @@ // TODO: If thread is done with work and times out, disband/free } -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && ompt_state != ompt_state_undefined) { - if (ompt_state == ompt_state_idle) { - if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) { - ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1); - } - } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) { - KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || - ompt_state == ompt_state_wait_barrier_implicit || - ompt_state == ompt_state_wait_barrier_explicit); - - ompt_lw_taskteam_t *team = - this_thr->th.th_team->t.ompt_serialized_team_info; - ompt_parallel_id_t pId; - ompt_task_id_t tId; - if (team) { - pId = team->ompt_team_info.parallel_id; - tId = team->ompt_task_info.task_id; - } else { - pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; - tId = this_thr->th.th_current_task->ompt_task_info.task_id; +#if OMPT_SUPPORT + omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; + if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) { +#if OMPT_OPTIONAL + if (final_spin) { + __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId); + ompt_exit_state = this_thr->th.ompt_thread_info.state; + } +#endif + if (ompt_exit_state == omp_state_idle) { +#if OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_idle) { + ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_end); } - ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId); +#endif + this_thr->th.ompt_thread_info.state = omp_state_overhead; } } #endif Index: runtime/src/ompt-event-specific.h =================================================================== --- runtime/src/ompt-event-specific.h +++ runtime/src/ompt-event-specific.h @@ -22,132 +22,85 @@ | the OMPT TR. They are exposed to tools through ompt_set_callback. +--------------------------------------------------------------------------*/ -#define ompt_event_NEVER ompt_set_result_event_never_occurs -#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback -#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some -#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always +//#define ompt_event_NEVER ompt_set_never +#define ompt_event_UNIMPLEMENTED ompt_set_never +#define ompt_event_MAY_CONVENIENT ompt_set_sometimes +#define ompt_event_MAY_ALWAYS ompt_set_always -#if OMPT_TRACE -#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS +#if OMPT_OPTIONAL +#define ompt_event_MAY_ALWAYS_OPTIONAL ompt_event_MAY_ALWAYS #else -#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED -#endif - -#if OMPT_BLAME -#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS -#else -#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED +#define ompt_event_MAY_ALWAYS_OPTIONAL ompt_event_UNIMPLEMENTED #endif /*---------------------------------------------------------------------------- | Mandatory Events +--------------------------------------------------------------------------*/ -#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_parallel_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_parallel_end_implemented ompt_event_MAY_ALWAYS -#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_task_create_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_task_schedule_implemented ompt_event_MAY_ALWAYS -#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_thread_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_thread_end_implemented ompt_event_MAY_ALWAYS -#define ompt_event_control_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS -#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS /*---------------------------------------------------------------------------- - | Optional Events (blame shifting) + | Target Related Events (not yet implemented) +--------------------------------------------------------------------------*/ -#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME - -#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_initialize_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_finalize_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_nest_lock_last_implemented \ - ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_callback_target_map_implemented ompt_event_UNIMPLEMENTED /*---------------------------------------------------------------------------- - | Optional Events (synchronous events) + | Optional Events (blame shifting) +--------------------------------------------------------------------------*/ -#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_idle_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_sync_region_wait_implemented \ + ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_mutex_released_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE +/*---------------------------------------------------------------------------- + | Optional Events (synchronous events) + +--------------------------------------------------------------------------*/ -#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_work_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_master_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_release_nest_lock_prev_implemented \ - ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_sync_region_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_nest_lock_first_implemented \ - ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_nest_lock_next_implemented \ - ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_mutex_acquire_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_mutex_acquired_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_nest_lock_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_lock_init_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_lock_destroy_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_flush_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_cancel_implemented ompt_event_MAY_ALWAYS_OPTIONAL #if OMP_40_ENABLED -#define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_task_dependences_implemented \ + ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_task_dependence_implemented ompt_event_MAY_ALWAYS_OPTIONAL #else -#define ompt_event_task_dependences_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_task_dependence_pair_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_task_dependences_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_task_dependence_implemented ompt_event_UNIMPLEMENTED #endif /* OMP_40_ENABLED */ #endif Index: runtime/src/ompt-general.cpp =================================================================== --- runtime/src/ompt-general.cpp +++ runtime/src/ompt-general.cpp @@ -8,6 +8,10 @@ #include #include #include +#if KMP_OS_UNIX +#include +#include +#endif /***************************************************************************** * ompt include files @@ -36,8 +40,13 @@ typedef struct { const char *state_name; - ompt_state_t state_id; -} ompt_state_info_t; + omp_state_t state_id; +} omp_state_info_t; + +typedef struct { + const char *name; + ompt_mutex_impl_t id; +} ompt_mutex_impl_info_t; enum tool_setting_e { omp_tool_error, @@ -46,25 +55,32 @@ omp_tool_enabled }; -typedef void (*ompt_initialize_t)(ompt_function_lookup_t ompt_fn_lookup, - const char *version, - unsigned int ompt_version); +typedef int (*ompt_initialize_t)(ompt_function_lookup_t lookup, + struct ompt_fns_t *fns); + +typedef void (*ompt_finalize_t)(struct ompt_fns_t *fns); /***************************************************************************** * global variables ****************************************************************************/ -int ompt_enabled = 0; +ompt_callbacks_active_t ompt_enabled; + +omp_state_info_t omp_state_info[] = { +#define omp_state_macro(state, code) {#state, state}, + FOREACH_OMP_STATE(omp_state_macro) +#undef omp_state_macro +}; -ompt_state_info_t ompt_state_info[] = { -#define ompt_state_macro(state, code) {#state, state}, - FOREACH_OMPT_STATE(ompt_state_macro) -#undef ompt_state_macro +ompt_mutex_impl_info_t ompt_mutex_impl_info[] = { +#define ompt_mutex_impl_macro(name, id) {#name, name}, + FOREACH_OMPT_MUTEX_IMPL(ompt_mutex_impl_macro) +#undef ompt_mutex_impl_macro }; -ompt_callbacks_t ompt_callbacks; +ompt_callbacks_internal_t ompt_callbacks; -static ompt_initialize_t ompt_initialize_fn = NULL; +static ompt_fns_t *ompt_fns = NULL; /***************************************************************************** * forward declarations @@ -72,48 +88,71 @@ static ompt_interface_fn_t ompt_fn_lookup(const char *s); -OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void); +OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void); /***************************************************************************** * initialization and finalization (private operations) ****************************************************************************/ /* On Unix-like systems that support weak symbols the following implementation - * of ompt_tool() will be used in case no tool-supplied implementation of + * of ompt_start_tool() will be used in case no tool-supplied implementation of * this function is present in the address space of a process. * * On Windows, the ompt_tool_windows function is used to find the * ompt_tool symbol across all modules loaded by a process. If ompt_tool is * found, ompt_tool's return value is used to initialize the tool. Otherwise, * NULL is returned and OMPT won't be enabled */ + +typedef ompt_fns_t *(*ompt_start_tool_t)(unsigned int, const char *); + +#if KMP_OS_UNIX + #if OMPT_HAVE_WEAK_ATTRIBUTE +_OMP_EXTERN __attribute__((weak)) +#elif defined KMP_DYNAMIC_LIB _OMP_EXTERN -__attribute__((weak)) ompt_initialize_t ompt_tool() { +#warning Activation of OMPT is might fail for tools statically linked into the application. +#else +#error Activation of OMPT is not supported on this platform. +#endif +ompt_fns_t * +ompt_start_tool(unsigned int omp_version, const char *runtime_version) { +#ifdef KMP_DYNAMIC_LIB + ompt_fns_t *ret = NULL; + // Try next symbol in the address space + ompt_start_tool_t next_tool = NULL; + *(void **)(&next_tool) = dlsym(RTLD_NEXT, "ompt_start_tool"); + if (next_tool) + ret = (next_tool)(omp_version, runtime_version); + return ret; +#else #if OMPT_DEBUG - printf("ompt_tool() is called from the RTL\n"); + printf("ompt_start_tool() is called from the RTL\n"); #endif return NULL; +#endif } #elif OMPT_HAVE_PSAPI #include #pragma comment(lib, "psapi.lib") -#define ompt_tool ompt_tool_windows +#define ompt_start_tool ompt_tool_windows // The number of loaded modules to start enumeration with EnumProcessModules() #define NUM_MODULES 128 -static ompt_initialize_t ompt_tool_windows() { +static ompt_fns_t *ompt_tool_windows(unsigned int omp_version, + const char *runtime_version) { int i; DWORD needed, new_size; HMODULE *modules; HANDLE process = GetCurrentProcess(); modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE)); - ompt_initialize_t (*ompt_tool_p)() = NULL; + ompt_start_tool_t ompt_tool_p = NULL; #if OMPT_DEBUG - printf("ompt_tool_windows(): looking for ompt_tool\n"); + printf("ompt_tool_windows(): looking for ompt_start_tool\n"); #endif if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE), &needed)) { @@ -135,21 +174,22 @@ } } for (i = 0; i < new_size; ++i) { - (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool"); + (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_start_tool"); if (ompt_tool_p) { #if OMPT_DEBUG TCHAR modName[MAX_PATH]; if (GetModuleFileName(modules[i], modName, MAX_PATH)) - printf("ompt_tool_windows(): ompt_tool found in module %s\n", modName); + printf("ompt_tool_windows(): ompt_start_tool found in module %s\n", + modName); #endif free(modules); - return ompt_tool_p(); + return (*ompt_tool_p)(omp_version, runtime_version); } #if OMPT_DEBUG else { TCHAR modName[MAX_PATH]; if (GetModuleFileName(modules[i], modName, MAX_PATH)) - printf("ompt_tool_windows(): ompt_tool not found in module %s\n", + printf("ompt_tool_windows(): ompt_start_tool not found in module %s\n", modName); } #endif @@ -161,6 +201,49 @@ #error Either __attribute__((weak)) or psapi.dll are required for OMPT support #endif // OMPT_HAVE_WEAK_ATTRIBUTE +static ompt_fns_t *ompt_try_start_tool(unsigned int omp_version, + const char *runtime_version) { + ompt_fns_t *ret = NULL; + ompt_start_tool_t start_tool = NULL; +#if KMP_OS_WINDOWS + // Cannot use colon to describe a list of absolute paths on Windows + const char *sep = ";"; +#else + const char *sep = ":"; +#endif + + // Try in the current address space + if ((ret = ompt_start_tool(omp_version, runtime_version))) + return ret; + + // Try tool-libraries-var ICV + const char *tool_libs = getenv("OMP_TOOL_LIBRARIES"); + if (tool_libs) { + const char *libs = __kmp_str_format("%s", tool_libs); + char *buf; + char *fname = __kmp_str_token(CCAST(char *, libs), sep, &buf); + while (fname) { +#if KMP_OS_UNIX + void *h = dlopen(fname, RTLD_LAZY); + if (h) { + *(void **)(&start_tool) = dlsym(h, "ompt_start_tool"); +#elif KMP_OS_WINDOWS + HMODULE h = LoadLibrary(fname); + if (h) { + start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool"); +#else +#error Activation of OMPT is not supported on this platform. +#endif + if (start_tool && (ret = (*start_tool)(omp_version, runtime_version))) + break; + } + fname = __kmp_str_token(NULL, sep, &buf); + } + __kmp_str_free(&libs); + } + return ret; +} + void ompt_pre_init() { //-------------------------------------------------- // Execute the pre-initialization logic only once. @@ -194,10 +277,14 @@ case omp_tool_unset: case omp_tool_enabled: - ompt_initialize_fn = ompt_tool(); - if (ompt_initialize_fn) { - ompt_enabled = 1; - } + + //-------------------------------------------------- + // Load tool iff specified in environment variable + //-------------------------------------------------- + ompt_fns = + ompt_try_start_tool(__kmp_openmp_version, ompt_get_runtime_version()); + + memset(&ompt_enabled, 0, sizeof(ompt_enabled)); break; case omp_tool_error: @@ -226,31 +313,34 @@ //-------------------------------------------------- // Initialize the tool if so indicated. //-------------------------------------------------- - if (ompt_enabled) { - ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(), - OMPT_VERSION); + if (ompt_fns) { + ompt_enabled.enabled = !!ompt_fns->initialize(ompt_fn_lookup, ompt_fns); ompt_thread_t *root_thread = ompt_get_thread(); - ompt_set_thread_state(root_thread, ompt_state_overhead); + ompt_set_thread_state(root_thread, omp_state_overhead); - if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { - ompt_callbacks.ompt_callback(ompt_event_thread_begin)( - ompt_thread_initial, ompt_get_thread_id()); + if (ompt_enabled.ompt_callback_thread_begin) { + ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( + ompt_thread_initial, __ompt_get_thread_data_internal()); + } + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); + if (ompt_enabled.ompt_callback_task_create) { + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + NULL, NULL, task_data, ompt_task_initial, 0, NULL); } - ompt_set_thread_state(root_thread, ompt_state_work_serial); + ompt_set_thread_state(root_thread, omp_state_work_serial); } } void ompt_fini() { - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) { - ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)(); - } + if (ompt_enabled.enabled) { + ompt_fns->finalize(ompt_fns); } - ompt_enabled = 0; + memset(&ompt_enabled, 0, sizeof(ompt_enabled)); } /***************************************************************************** @@ -261,15 +351,15 @@ * state ****************************************************************************/ -OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state, - const char **next_state_name) { - const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t); +OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state, + const char **next_state_name) { + const static int len = sizeof(omp_state_info) / sizeof(omp_state_info_t); int i = 0; for (i = 0; i < len - 1; i++) { - if (ompt_state_info[i].state_id == current_state) { - *next_state = ompt_state_info[i + 1].state_id; - *next_state_name = ompt_state_info[i + 1].state_name; + if (omp_state_info[i].state_id == current_state) { + *next_state = omp_state_info[i + 1].state_id; + *next_state_name = omp_state_info[i + 1].state_name; return 1; } } @@ -277,17 +367,35 @@ return 0; } +OMPT_API_ROUTINE int ompt_enumerate_mutex_impls(int current_impl, + int *next_impl, + const char **next_impl_name) { + const static int len = + sizeof(ompt_mutex_impl_info) / sizeof(ompt_mutex_impl_info_t); + int i = 0; + for (i = 0; i < len - 1; i++) { + if (ompt_mutex_impl_info[i].id != current_impl) + continue; + *next_impl = ompt_mutex_impl_info[i + 1].id; + *next_impl_name = ompt_mutex_impl_info[i + 1].name; + return 1; + } + return 0; +} + /***************************************************************************** * callbacks ****************************************************************************/ -OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) { - switch (evid) { +OMPT_API_ROUTINE int ompt_set_callback(ompt_callbacks_t which, + ompt_callback_t callback) { + switch (which) { #define ompt_event_macro(event_name, callback_type, event_id) \ case event_name: \ if (ompt_event_implementation_status(event_name)) { \ - ompt_callbacks.ompt_callback(event_name) = (callback_type)cb; \ + ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \ + ompt_enabled.event_name = 1; \ } \ return ompt_event_implementation_status(event_name); @@ -296,12 +404,13 @@ #undef ompt_event_macro default: - return ompt_set_result_registration_error; + return ompt_set_error; } } -OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) { - switch (evid) { +OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which, + ompt_callback_t *callback) { + switch (which) { #define ompt_event_macro(event_name, callback_type, event_id) \ case event_name: \ @@ -309,7 +418,7 @@ ompt_callback_t mycb = \ (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \ if (mycb) { \ - *cb = mycb; \ + *callback = mycb; \ return ompt_get_callback_success; \ } \ } \ @@ -328,54 +437,149 @@ * parallel regions ****************************************************************************/ -OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) { - return __ompt_get_parallel_id_internal(ancestor_level); -} - -OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) { - return __ompt_get_parallel_team_size_internal(ancestor_level); -} - -OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) { - return __ompt_get_parallel_function_internal(ancestor_level); +OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level, + ompt_data_t **parallel_data, + int *team_size) { + return __ompt_get_parallel_info_internal(ancestor_level, parallel_data, + team_size); } -OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) { - ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id); +OMPT_API_ROUTINE omp_state_t ompt_get_state(ompt_wait_id_t *wait_id) { + omp_state_t thread_state = __ompt_get_state_internal(wait_id); - if (thread_state == ompt_state_undefined) { - thread_state = ompt_state_work_serial; + if (thread_state == omp_state_undefined) { + thread_state = omp_state_work_serial; } return thread_state; } /***************************************************************************** - * threads + * tasks ****************************************************************************/ -OMPT_API_ROUTINE void *ompt_get_idle_frame() { - return __ompt_get_idle_frame_internal(); +OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) { + return __ompt_get_thread_data_internal(); +} + +OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num) { + return __ompt_get_task_info_internal(ancestor_level, type, task_data, + task_frame, parallel_data, thread_num); } /***************************************************************************** - * tasks + * places ****************************************************************************/ -OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) { - return __ompt_get_thread_id_internal(); +OMPT_API_ROUTINE int ompt_get_num_places(void) { +// copied from kmp_ftn_entry.h (but modified) +#if !KMP_AFFINITY_SUPPORTED + return 0; +#else + if (!KMP_AFFINITY_CAPABLE()) + return 0; + return __kmp_affinity_num_masks; +#endif +} + +OMPT_API_ROUTINE int ompt_get_place_proc_ids(int place_num, int ids_size, + int *ids) { +// copied from kmp_ftn_entry.h (but modified) +#if !KMP_AFFINITY_SUPPORTED + return 0; +#else + int i, count; + int tmp_ids[ids_size]; + if (!KMP_AFFINITY_CAPABLE()) + return 0; + if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) + return 0; + /* TODO: Is this safe for asynchronous call from signal handler during runtime + * shutdown? */ + kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); + count = 0; + KMP_CPU_SET_ITERATE(i, mask) { + if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || + (!KMP_CPU_ISSET(i, mask))) { + continue; + } + if (count < ids_size) + tmp_ids[count] = i; + count++; + } + if (ids_size >= count) { + for (i = 0; i < count; i++) { + ids[i] = tmp_ids[i]; + } + } + return count; +#endif } -OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) { - return __ompt_get_task_id_internal(depth); +OMPT_API_ROUTINE int ompt_get_place_num(void) { +// copied from kmp_ftn_entry.h (but modified) +#if !KMP_AFFINITY_SUPPORTED + return -1; +#else + int gtid; + kmp_info_t *thread; + if (!KMP_AFFINITY_CAPABLE()) + return -1; + gtid = __kmp_entry_gtid(); + thread = __kmp_thread_from_gtid(gtid); + if (thread == NULL || thread->th.th_current_place < 0) + return -1; + return thread->th.th_current_place; +#endif } -OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) { - return __ompt_get_task_frame_internal(depth); +OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size, + int *place_nums) { +// copied from kmp_ftn_entry.h (but modified) +#if !KMP_AFFINITY_SUPPORTED + return 0; +#else + int i, gtid, place_num, first_place, last_place, start, end; + kmp_info_t *thread; + if (!KMP_AFFINITY_CAPABLE()) + return 0; + gtid = __kmp_entry_gtid(); + thread = __kmp_thread_from_gtid(gtid); + if (thread == NULL) + return 0; + first_place = thread->th.th_first_place; + last_place = thread->th.th_last_place; + if (first_place < 0 || last_place < 0) + return 0; + if (first_place <= last_place) { + start = first_place; + end = last_place; + } else { + start = last_place; + end = first_place; + } + if (end - start <= place_nums_size) + for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) { + place_nums[i] = place_num; + } + return end - start; +#endif } -OMPT_API_ROUTINE void *ompt_get_task_function(int depth) { - return __ompt_get_task_function_internal(depth); +/***************************************************************************** + * places + ****************************************************************************/ + +OMPT_API_ROUTINE int ompt_get_proc_id(void) { +#if KMP_OS_LINUX + return sched_getcpu(); +#else + return -1; +#endif } /***************************************************************************** @@ -435,28 +639,59 @@ OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; } /***************************************************************************** - * application-facing API +* application-facing API ****************************************************************************/ /*---------------------------------------------------------------------------- | control ---------------------------------------------------------------------------*/ -_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) { - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) { - ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier); +int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg) { + + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_control_tool) { + return ompt_callbacks.ompt_callback(ompt_callback_control_tool)( + command, modifier, arg, OMPT_LOAD_RETURN_ADDRESS(__kmp_entry_gtid())); + } else { + return -1; + } + } else { + return -2; } } /***************************************************************************** + * misc + ****************************************************************************/ + +OMPT_API_ROUTINE uint64_t ompt_get_unique_id(void) { + return __ompt_get_unique_id_internal(); +} + +/***************************************************************************** + * Target + ****************************************************************************/ + +OMPT_API_ROUTINE int ompt_get_target_info(uint64_t *device_num, + ompt_id_t *target_id, + ompt_id_t *host_op_id) { + return 0; // thread is not in a target region +} + +OMPT_API_ROUTINE int ompt_get_num_devices(void) { + return 1; // only one device (the current device) is available +} + +/***************************************************************************** * API inquiry for tool ****************************************************************************/ static ompt_interface_fn_t ompt_fn_lookup(const char *s) { #define ompt_interface_fn(fn) \ + fn##_t fn##_f = fn; \ if (strcmp(s, #fn) == 0) \ - return (ompt_interface_fn_t)fn; + return (ompt_interface_fn_t)fn##_f; FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) Index: runtime/src/ompt-internal.h =================================================================== --- runtime/src/ompt-internal.h +++ runtime/src/ompt-internal.h @@ -13,19 +13,39 @@ #define ompt_callback(e) e##_callback -typedef struct ompt_callbacks_s { +typedef struct ompt_callbacks_internal_s { #define ompt_event_macro(event, callback, eventid) \ callback ompt_callback(event); FOREACH_OMPT_EVENT(ompt_event_macro) #undef ompt_event_macro -} ompt_callbacks_t; +} ompt_callbacks_internal_t; + +typedef struct ompt_callbacks_active_s { + unsigned int enabled : 1; +#define ompt_event_macro(event, callback, eventid) unsigned int event : 1; + + FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_callbacks_active_t; + +typedef struct kmp_taskdata kmp_taskdata_t; + +#define TASK_TYPE_DETAILS_FORMAT(info) \ + ((info->td_flags.task_serial || info->td_flags.tasking_ser) \ + ? ompt_task_undeferred \ + : 0x0) | \ + ((!(info->td_flags.tiedness)) ? ompt_task_untied : 0x0) | \ + (info->td_flags.final ? ompt_task_final : 0x0) | \ + (info->td_flags.merged_if0 ? ompt_task_mergeable : 0x0) /* | \*/ +/*(info->td_flags.mergeable ? ompt_task_merged : 0x0) */ typedef struct { ompt_frame_t frame; - void *function; - ompt_task_id_t task_id; + ompt_data_t task_data; + kmp_taskdata_t *scheduling_parent; #if OMP_40_ENABLED int ndeps; ompt_task_dependence_t *deps; @@ -33,32 +53,31 @@ } ompt_task_info_t; typedef struct { - ompt_parallel_id_t parallel_id; - void *microtask; + ompt_data_t parallel_data; + void *master_return_address; } ompt_team_info_t; typedef struct ompt_lw_taskteam_s { ompt_team_info_t ompt_team_info; ompt_task_info_t ompt_task_info; + int heap; struct ompt_lw_taskteam_s *parent; } ompt_lw_taskteam_t; -typedef struct ompt_parallel_info_s { - ompt_task_id_t parent_task_id; /* id of parent task */ - ompt_parallel_id_t parallel_id; /* id of parallel region */ - ompt_frame_t *parent_task_frame; /* frame data of parent task */ - void *parallel_function; /* pointer to outlined function */ -} ompt_parallel_info_t; - typedef struct { - ompt_state_t state; + ompt_data_t thread_data; + ompt_data_t task_data; /* stored here from implicit barrier-begin until + implicit-task-end */ + void *return_address; /* stored here on entry of runtime */ + omp_state_t state; ompt_wait_id_t wait_id; + int ompt_task_yielded; void *idle_frame; } ompt_thread_info_t; -extern ompt_callbacks_t ompt_callbacks; +extern ompt_callbacks_internal_t ompt_callbacks; -#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE +#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_OPTIONAL #if USE_FAST_MEMORY #define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate #define KMP_OMPT_DEPS_FREE __kmp_fast_free @@ -66,7 +85,7 @@ #define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc #define KMP_OMPT_DEPS_FREE __kmp_thread_free #endif -#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */ +#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_OPTIONAL */ #ifdef __cplusplus extern "C" { @@ -76,7 +95,20 @@ void ompt_post_init(void); void ompt_fini(void); -extern int ompt_enabled; +#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) +#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level) + +int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg); + +extern ompt_callbacks_active_t ompt_enabled; + +#if KMP_OS_WINDOWS +#define UNLIKELY(x) (x) +#define OMPT_NOINLINE __declspec(noinline) +#else +#define UNLIKELY(x) __builtin_expect(!!(x), 0) +#define OMPT_NOINLINE __attribute__((noinline)) +#endif #ifdef __cplusplus }; Index: runtime/src/ompt-specific.h =================================================================== --- runtime/src/ompt-specific.h +++ runtime/src/ompt-specific.h @@ -13,42 +13,64 @@ * forward declarations ****************************************************************************/ -void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid); +void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid); void __ompt_thread_assign_wait_id(void *variable); void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr, - int gtid, void *microtask, - ompt_parallel_id_t ompt_pid); + int gtid, ompt_data_t *ompt_pid, void *codeptr); -void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr); +void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr, + int on_heap); -ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(ompt_thread_t *thr); - -ompt_parallel_id_t __ompt_parallel_id_new(int gtid); -ompt_task_id_t __ompt_task_id_new(int gtid); +// ompt_lw_taskteam_t * +void __ompt_lw_taskteam_unlink(ompt_thread_t *thr); ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); -ompt_task_info_t *__ompt_get_taskinfo(int depth); - -void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid); +ompt_task_info_t *__ompt_get_task_info_object(int depth); -void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid); +int __ompt_get_parallel_info_internal(int ancestor_level, + ompt_data_t **parallel_data, + int *team_size); -int __ompt_get_parallel_team_size_internal(int ancestor_level); +int __ompt_get_task_info_internal(int ancestor_level, int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, int *thread_num); -ompt_task_id_t __ompt_get_task_id_internal(int depth); +ompt_data_t *__ompt_get_thread_data_internal(); -ompt_frame_t *__ompt_get_task_frame_internal(int depth); +static uint64_t __ompt_get_get_unique_id_internal(); /***************************************************************************** * macros ****************************************************************************/ +#define OMPT_CUR_TASK_INFO(thr) (&(thr->th.th_current_task->ompt_task_info)) +#define OMPT_CUR_TASK_DATA(thr) \ + (&(thr->th.th_current_task->ompt_task_info.task_data)) +#define OMPT_CUR_TEAM_INFO(thr) (&(thr->th.th_team->t.ompt_team_info)) +#define OMPT_CUR_TEAM_DATA(thr) \ + (&(thr->th.th_team->t.ompt_team_info.parallel_data)) + #define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI #define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) +inline void *__ompt_load_return_address(int gtid) { + kmp_info_t *thr = __kmp_threads[gtid]; + void *return_address = thr->th.ompt_thread_info.return_address; + thr->th.ompt_thread_info.return_address = NULL; + return return_address; +} + +#define OMPT_STORE_RETURN_ADDRESS(gtid) \ + if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ + !__kmp_threads[gtid]->th.ompt_thread_info.return_address) \ + __kmp_threads[gtid]->th.ompt_thread_info.return_address = \ + __builtin_return_address(0) +#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) + //****************************************************************************** // inline functions //****************************************************************************** @@ -62,7 +84,7 @@ return ompt_get_thread_gtid(gtid); } -inline void ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) { +inline void ompt_set_thread_state(ompt_thread_t *thread, omp_state_t state) { thread->th.ompt_thread_info.state = state; } Index: runtime/src/ompt-specific.cpp =================================================================== --- runtime/src/ompt-specific.cpp +++ runtime/src/ompt-specific.cpp @@ -6,39 +6,32 @@ #include "ompt-internal.h" #include "ompt-specific.h" +#if KMP_OS_UNIX +#include +#include +#endif + +#if KMP_OS_WINDOWS +#define THREAD_LOCAL __declspec(thread) +#else +#define THREAD_LOCAL __thread +#endif + //****************************************************************************** // macros //****************************************************************************** -#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t)(id >= 0) ? id + 1 : 0) - -#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info; +#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info #define OMPT_THREAD_ID_BITS 16 -// 2013 08 24 - John Mellor-Crummey -// ideally, a thread should assign its own ids based on thread private data. -// however, the way the intel runtime reinitializes thread data structures -// when it creates teams makes it difficult to maintain persistent thread -// data. using a shared variable instead is simple. I leave it to intel to -// sort out how to implement a higher performance version in their runtime. - -// when using fetch_and_add to generate the IDs, there isn't any reason to waste -// bits for thread id. -#if 0 -#define NEXT_ID(id_ptr, tid) \ - ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid)) -#else -#define NEXT_ID(id_ptr, tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr)) -#endif - //****************************************************************************** // private operations //****************************************************************************** //---------------------------------------------------------- // traverse the team and task hierarchy -// note: __ompt_get_teaminfo and __ompt_get_taskinfo +// note: __ompt_get_teaminfo and __ompt_get_task_info_object // traverse the hierarchy similarly and need to be // kept consistent //---------------------------------------------------------- @@ -51,7 +44,7 @@ if (team == NULL) return NULL; - ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team); + ompt_lw_taskteam_t *next_lwt = LWT_FROM_TEAM(team), *lwt = NULL; while (depth > 0) { // next lightweight team (if any) @@ -61,9 +54,14 @@ // next heavyweight team (if any) after // lightweight teams are exhausted if (!lwt && team) { - team = team->t.t_parent; - if (team) { - lwt = LWT_FROM_TEAM(team); + if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + team = team->t.t_parent; + if (team) { + next_lwt = LWT_FROM_TEAM(team); + } } } @@ -90,13 +88,14 @@ return NULL; } -ompt_task_info_t *__ompt_get_taskinfo(int depth) { +ompt_task_info_t *__ompt_get_task_info_object(int depth) { ompt_task_info_t *info = NULL; kmp_info_t *thr = ompt_get_thread(); if (thr) { kmp_taskdata_t *taskdata = thr->th.th_current_task; - ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team); + ompt_lw_taskteam_t *lwt = NULL, + *next_lwt = LWT_FROM_TEAM(taskdata->td_team); while (depth > 0) { // next lightweight team (if any) @@ -106,9 +105,59 @@ // next heavyweight team (if any) after // lightweight teams are exhausted if (!lwt && taskdata) { - taskdata = taskdata->td_parent; - if (taskdata) { - lwt = LWT_FROM_TEAM(taskdata->td_team); + if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + taskdata = taskdata->td_parent; + if (taskdata) { + next_lwt = LWT_FROM_TEAM(taskdata->td_team); + } + } + } + depth--; + } + + if (lwt) { + info = &lwt->ompt_task_info; + } else if (taskdata) { + info = &taskdata->ompt_task_info; + } + } + + return info; +} + +ompt_task_info_t *__ompt_get_scheduling_taskinfo(int depth) { + ompt_task_info_t *info = NULL; + kmp_info_t *thr = ompt_get_thread(); + + if (thr) { + kmp_taskdata_t *taskdata = thr->th.th_current_task; + + ompt_lw_taskteam_t *lwt = NULL, + *next_lwt = LWT_FROM_TEAM(taskdata->td_team); + + while (depth > 0) { + // next lightweight team (if any) + if (lwt) + lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && taskdata) { + // first try scheduling parent (for explicit task scheduling) + if (taskdata->ompt_task_info.scheduling_parent) { + taskdata = taskdata->ompt_task_info.scheduling_parent; + } else if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + // then go for implicit tasks + taskdata = taskdata->td_parent; + if (taskdata) { + next_lwt = LWT_FROM_TEAM(taskdata->td_team); + } } } depth--; @@ -132,29 +181,14 @@ // thread support //---------------------------------------------------------- -ompt_parallel_id_t __ompt_thread_id_new() { - static uint64_t ompt_thread_id = 1; - return NEXT_ID(&ompt_thread_id, 0); -} - -void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) { - ompt_callbacks.ompt_callback(ompt_event_thread_begin)( - thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); -} - -void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid) { - ompt_callbacks.ompt_callback(ompt_event_thread_end)( - thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); -} - -ompt_thread_id_t __ompt_get_thread_id_internal() { - // FIXME: until we have a better way of assigning ids, use __kmp_get_gtid - // since the return value might be negative, we need to test that before - // assigning it to an ompt_thread_id_t, which is unsigned. - int id = __kmp_get_gtid(); - assert(id >= 0); - - return GTID_TO_OMPT_THREAD_ID(id); +ompt_data_t *__ompt_get_thread_data_internal() { + if (__kmp_get_gtid() >= 0) { + kmp_info_t *thread = ompt_get_thread(); + if (thread == NULL) + return NULL; + return &(thread->th.ompt_thread_info.thread_data); + } + return NULL; } //---------------------------------------------------------- @@ -162,13 +196,12 @@ //---------------------------------------------------------- void __ompt_thread_assign_wait_id(void *variable) { - int gtid = __kmp_gtid_get_specific(); - kmp_info_t *ti = ompt_get_thread_gtid(gtid); + kmp_info_t *ti = ompt_get_thread(); ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable; } -ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) { +omp_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) { kmp_info_t *ti = ompt_get_thread(); if (ti) { @@ -176,46 +209,26 @@ *ompt_wait_id = ti->th.ompt_thread_info.wait_id; return ti->th.ompt_thread_info.state; } - return ompt_state_undefined; -} - -//---------------------------------------------------------- -// idle frame support -//---------------------------------------------------------- - -void *__ompt_get_idle_frame_internal(void) { - kmp_info_t *ti = ompt_get_thread(); - return ti ? ti->th.ompt_thread_info.idle_frame : NULL; + return omp_state_undefined; } //---------------------------------------------------------- // parallel region support //---------------------------------------------------------- -ompt_parallel_id_t __ompt_parallel_id_new(int gtid) { - static uint64_t ompt_parallel_id = 1; - return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0; -} - -void *__ompt_get_parallel_function_internal(int depth) { - ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); - void *function = info ? info->microtask : NULL; - return function; -} - -ompt_parallel_id_t __ompt_get_parallel_id_internal(int depth) { - ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); - ompt_parallel_id_t id = info ? info->parallel_id : 0; - return id; -} - -int __ompt_get_parallel_team_size_internal(int depth) { - // initialize the return value with the error value. - // if there is a team at the specified depth, the default - // value will be overwritten the size of that team. - int size = -1; - (void)__ompt_get_teaminfo(depth, &size); - return size; +int __ompt_get_parallel_info_internal(int ancestor_level, + ompt_data_t **parallel_data, + int *team_size) { + ompt_team_info_t *info; + if (team_size) { + info = __ompt_get_teaminfo(ancestor_level, team_size); + } else { + info = __ompt_get_teaminfo(ancestor_level, NULL); + } + if (parallel_data) { + *parallel_data = info ? &(info->parallel_data) : NULL; + } + return info ? 2 : 0; } //---------------------------------------------------------- @@ -223,60 +236,182 @@ //---------------------------------------------------------- void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, - void *microtask, ompt_parallel_id_t ompt_pid) { - lwt->ompt_team_info.parallel_id = ompt_pid; - lwt->ompt_team_info.microtask = microtask; - lwt->ompt_task_info.task_id = 0; + ompt_data_t *ompt_pid, void *codeptr) { + // initialize parallel_data with input, return address to parallel_data on + // exit + lwt->ompt_team_info.parallel_data = *ompt_pid; + lwt->ompt_team_info.master_return_address = codeptr; + lwt->ompt_task_info.task_data.value = 0; lwt->ompt_task_info.frame.reenter_runtime_frame = NULL; lwt->ompt_task_info.frame.exit_runtime_frame = NULL; - lwt->ompt_task_info.function = NULL; + lwt->ompt_task_info.scheduling_parent = NULL; + lwt->ompt_task_info.deps = NULL; + lwt->ompt_task_info.ndeps = 0; + lwt->heap = 0; lwt->parent = 0; } -void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) { - ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info; - lwt->parent = my_parent; - thr->th.th_team->t.ompt_serialized_team_info = lwt; +void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, + int on_heap) { + ompt_lw_taskteam_t *link_lwt = lwt; + if (thr->th.th_team->t.t_serialized > + 1) { // we already have a team, so link the new team and swap values + if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap + link_lwt = + (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); + } + link_lwt->heap = on_heap; + + // would be swap in the (on_stack) case. + ompt_team_info_t tmp_team = lwt->ompt_team_info; + link_lwt->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr); + *OMPT_CUR_TEAM_INFO(thr) = tmp_team; + + ompt_task_info_t tmp_task = lwt->ompt_task_info; + link_lwt->ompt_task_info = *OMPT_CUR_TASK_INFO(thr); + *OMPT_CUR_TASK_INFO(thr) = tmp_task; + + // link the taskteam into the list of taskteams: + ompt_lw_taskteam_t *my_parent = + thr->th.th_team->t.ompt_serialized_team_info; + link_lwt->parent = my_parent; + thr->th.th_team->t.ompt_serialized_team_info = link_lwt; + } else { + // this is the first serialized team, so we just store the values in the + // team and drop the taskteam-object + *OMPT_CUR_TEAM_INFO(thr) = lwt->ompt_team_info; + *OMPT_CUR_TASK_INFO(thr) = lwt->ompt_task_info; + } } -ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(kmp_info_t *thr) { +void __ompt_lw_taskteam_unlink(kmp_info_t *thr) { ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; - if (lwtask) + if (lwtask) { thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; - return lwtask; + + ompt_team_info_t tmp_team = lwtask->ompt_team_info; + lwtask->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr); + *OMPT_CUR_TEAM_INFO(thr) = tmp_team; + + ompt_task_info_t tmp_task = lwtask->ompt_task_info; + lwtask->ompt_task_info = *OMPT_CUR_TASK_INFO(thr); + *OMPT_CUR_TASK_INFO(thr) = tmp_task; + + if (lwtask->heap) { + __kmp_free(lwtask); + lwtask = NULL; + } + } + // return lwtask; } //---------------------------------------------------------- // task support //---------------------------------------------------------- -ompt_task_id_t __ompt_task_id_new(int gtid) { - static uint64_t ompt_task_id = 1; - return NEXT_ID(&ompt_task_id, gtid); -} +int __ompt_get_task_info_internal(int ancestor_level, int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num) { + if (ancestor_level < 0) + return 0; -ompt_task_id_t __ompt_get_task_id_internal(int depth) { - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - ompt_task_id_t task_id = info ? info->task_id : 0; - return task_id; -} + // copied from __ompt_get_scheduling_taskinfo + ompt_task_info_t *info = NULL; + ompt_team_info_t *team_info = NULL; + kmp_info_t *thr = ompt_get_thread(); -void *__ompt_get_task_function_internal(int depth) { - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - void *function = info ? info->function : NULL; - return function; -} + if (thr) { + kmp_taskdata_t *taskdata = thr->th.th_current_task; + if (taskdata == NULL) + return 0; + kmp_team *team = thr->th.th_team; + if (team == NULL) + return 0; + ompt_lw_taskteam_t *lwt = NULL, + *next_lwt = LWT_FROM_TEAM(taskdata->td_team); + + while (ancestor_level > 0) { + // next lightweight team (if any) + if (lwt) + lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && taskdata) { + // first try scheduling parent (for explicit task scheduling) + if (taskdata->ompt_task_info.scheduling_parent) { + taskdata = taskdata->ompt_task_info.scheduling_parent; + } else if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + // then go for implicit tasks + taskdata = taskdata->td_parent; + if (team == NULL) + return 0; + team = team->t.t_parent; + if (taskdata) { + next_lwt = LWT_FROM_TEAM(taskdata->td_team); + } + } + } + ancestor_level--; + } -ompt_frame_t *__ompt_get_task_frame_internal(int depth) { - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - ompt_frame_t *frame = info ? frame = &info->frame : NULL; - return frame; + if (lwt) { + info = &lwt->ompt_task_info; + team_info = &lwt->ompt_team_info; + if (type) { + *type = ompt_task_implicit; + } + } else if (taskdata) { + info = &taskdata->ompt_task_info; + team_info = &team->t.ompt_team_info; + if (type) { + if (taskdata->td_parent) { + *type = (taskdata->td_flags.tasktype ? ompt_task_explicit + : ompt_task_implicit) | + TASK_TYPE_DETAILS_FORMAT(taskdata); + } else { + *type = ompt_task_initial; + } + } + } + if (task_data) { + *task_data = info ? &info->task_data : NULL; + } + if (task_frame) { + // OpenMP spec asks for the scheduling task to be returned. + *task_frame = info ? &info->frame : NULL; + } + if (parallel_data) { + *parallel_data = team_info ? &(team_info->parallel_data) : NULL; + } + return info ? 2 : 0; + } + return 0; } //---------------------------------------------------------- // team support //---------------------------------------------------------- -void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) { - team->t.ompt_team_info.parallel_id = ompt_pid; +void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid) { + team->t.ompt_team_info.parallel_data = ompt_pid; +} + +//---------------------------------------------------------- +// misc +//---------------------------------------------------------- + +static uint64_t __ompt_get_unique_id_internal() { + static uint64_t thread = 1; + static THREAD_LOCAL uint64_t ID = 0; + if (ID == 0) { + uint64_t new_thread = KMP_TEST_THEN_INC64((kmp_int64 *)&thread); + ID = new_thread << (sizeof(uint64_t) * 8 - OMPT_THREAD_ID_BITS); + } + return ++ID; } Index: runtime/src/z_Linux_util.cpp =================================================================== --- runtime/src/z_Linux_util.cpp +++ runtime/src/z_Linux_util.cpp @@ -2280,7 +2280,7 @@ #endif ) { #if OMPT_SUPPORT - *exit_frame_ptr = __builtin_frame_address(0); + *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); #endif switch (argc) { Index: runtime/test/CMakeLists.txt =================================================================== --- runtime/test/CMakeLists.txt +++ runtime/test/CMakeLists.txt @@ -34,8 +34,7 @@ pythonize_bool(LIBOMP_USE_HWLOC) pythonize_bool(LIBOMP_OMPT_SUPPORT) -pythonize_bool(LIBOMP_OMPT_BLAME) -pythonize_bool(LIBOMP_OMPT_TRACE) +pythonize_bool(LIBOMP_OMPT_OPTIONAL) pythonize_bool(LIBOMP_HAVE_LIBM) pythonize_bool(LIBOMP_HAVE_LIBATOMIC) Index: runtime/test/lit.cfg =================================================================== --- runtime/test/lit.cfg +++ runtime/test/lit.cfg @@ -108,11 +108,8 @@ if 'INTEL_LICENSE_FILE' in os.environ: config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE'] -# substitutions -if config.has_ompt: - config.substitutions.append(("FileCheck", config.test_filecheck)) - config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable")) +# substitutions config.substitutions.append(("%libomp-compile-and-run", \ "%libomp-compile && %libomp-run")) config.substitutions.append(("%libomp-cxx-compile-and-run", \ @@ -121,9 +118,14 @@ "%clangXX %cflags -std=c++11 %s -o %t" + libs)) config.substitutions.append(("%libomp-compile", \ "%clang %cflags %s -o %t" + libs)) +config.substitutions.append(("%libomp-tool", \ + "%clang %cflags -shared -fPIC -o %T/tool.so" + libs)) config.substitutions.append(("%libomp-run", "%t")) config.substitutions.append(("%clangXX", config.test_cxx_compiler)) config.substitutions.append(("%clang", config.test_compiler)) config.substitutions.append(("%openmp_flag", config.test_openmp_flag)) config.substitutions.append(("%cflags", config.test_cflags)) +if config.has_ompt: + config.substitutions.append(("FileCheck", config.test_filecheck)) + config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable")) Index: runtime/test/lit.site.cfg.in =================================================================== --- runtime/test/lit.site.cfg.in +++ runtime/test/lit.site.cfg.in @@ -11,7 +11,7 @@ config.operating_system = "@CMAKE_SYSTEM_NAME@" config.hwloc_library_dir = "@LIBOMP_HWLOC_LIBRARY_DIR@" config.using_hwloc = @LIBOMP_USE_HWLOC@ -config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_BLAME@ and @LIBOMP_OMPT_TRACE@ +config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_OPTIONAL@ config.has_libm = @LIBOMP_HAVE_LIBM@ config.has_libatomic = @LIBOMP_HAVE_LIBATOMIC@ Index: runtime/test/ompt/callback.h =================================================================== --- runtime/test/ompt/callback.h +++ runtime/test/ompt/callback.h @@ -1,119 +1,715 @@ +#define _BSD_SOURCE #include #include +#include #include +//#include "kmp.h" +#include +#ifdef OMPT_USE_LIBUNWIND +#define UNW_LOCAL_ONLY +#include +#endif +#include "ompt-signal.h" -static ompt_get_task_id_t ompt_get_task_id; -static ompt_get_task_frame_t ompt_get_task_frame; -static ompt_get_thread_id_t ompt_get_thread_id; -static ompt_get_parallel_id_t ompt_get_parallel_id; +static const char* ompt_thread_type_t_values[] = { + NULL, + "ompt_thread_initial", + "ompt_thread_worker", + "ompt_thread_other" +}; + +static const char* ompt_task_status_t_values[] = { + NULL, + "ompt_task_complete", + "ompt_task_yield", + "ompt_task_cancel", + "ompt_task_others" +}; +static const char* ompt_cancel_flag_t_values[] = { + "ompt_cancel_parallel", + "ompt_cancel_sections", + "ompt_cancel_do", + "ompt_cancel_taskgroup", + "ompt_cancel_activated", + "ompt_cancel_detected", + "ompt_cancel_discarded_task" +}; + +static ompt_set_callback_t ompt_set_callback; +static ompt_get_task_info_t ompt_get_task_info; +static ompt_get_thread_data_t ompt_get_thread_data; +static ompt_get_parallel_info_t ompt_get_parallel_info; +static ompt_get_unique_id_t ompt_get_unique_id; +static ompt_get_num_places_t ompt_get_num_places; +static ompt_get_place_proc_ids_t ompt_get_place_proc_ids; +static ompt_get_place_num_t ompt_get_place_num; +static ompt_get_partition_place_nums_t ompt_get_partition_place_nums; +static ompt_get_proc_id_t ompt_get_proc_id; +static ompt_enumerate_states_t ompt_enumerate_states; +static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls; static void print_ids(int level) { - ompt_frame_t* frame = ompt_get_task_frame(level); - printf("%" PRIu64 ": level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p\n", ompt_get_thread_id(), level, ompt_get_parallel_id(level), ompt_get_task_id(level), frame->exit_runtime_frame, frame->reenter_runtime_frame); + ompt_frame_t* frame ; + ompt_data_t* parallel_data; + ompt_data_t* task_data; +// int exists_parallel = ompt_get_parallel_info(level, ¶llel_data, NULL); + int exists_task = ompt_get_task_info(level, NULL, &task_data, &frame, ¶llel_data, NULL); + if (frame) + { + printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p\n", ompt_get_thread_data()->value, level, exists_task ? parallel_data->value : 0, exists_task ? task_data->value : 0, frame->exit_runtime_frame, frame->reenter_runtime_frame); +// printf("%" PRIu64 ": parallel level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p\n", ompt_get_thread_data()->value, level, exists_parallel ? parallel_data->value : 0, exists_task ? task_data->value : 0, frame->exit_runtime_frame, frame->reenter_runtime_frame); + } + else + printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", frame=%p\n", ompt_get_thread_data()->value, level, exists_task ? parallel_data->value : 0, exists_task ? task_data->value : 0, frame); + //if (__kmp_threads[__kmp_gtid].th.ompt_thread_info.kmp_return_address != NULL) + // printf( "1: return address not reset\n"); + } +/* #define print_frame(level)\ do {\ - printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", ompt_get_thread_id(), level, __builtin_frame_address(level));\ + unw_cursor_t cursor;\ + unw_context_t uc;\ + unw_word_t fp;\ + unw_getcontext(&uc);\ + unw_init_local(&cursor, &uc);\ + int tmp_level = level;\ + unw_get_reg(&cursor, UNW_REG_SP, &fp);\ + printf("callback %p\n", (void*)fp);\ + while (tmp_level > 0 && unw_step(&cursor) > 0)\ + {\ + unw_get_reg(&cursor, UNW_REG_SP, &fp);\ + printf("callback %p\n", (void*)fp);\ + tmp_level--;\ + }\ + if(tmp_level == 0)\ + printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", ompt_get_thread_data()->value, level, (void*)fp);\ + else\ + printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", ompt_get_thread_data()->value, level, NULL);\ } while(0) +*/ + +#define print_frame(level)\ +do {\ + printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", ompt_get_thread_data()->value, level, __builtin_frame_address(level));\ +} while(0) + +#define print_current_address(id)\ +{} /* Empty block between "#pragma omp ..." and __asm__ statement as a workaround for icc bug */ \ +__asm__("nop"); /* provide an instruction as jump target (compiler would insert an instruction if label is target of a jmp ) */ \ +ompt_label_##id:\ + printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, (char*)(&& ompt_label_##id)-1, (char*)(&& ompt_label_##id)-4) + /* "&& label" returns the address of the label (GNU extension); works with gcc, clang, icc */ + /* for void-type runtime function, the label is after the nop (-1), for functions with return value, there is a mov instruction before the label (-4) */ + +#define print_fuzzy_address(id)\ +{} /* Empty block between "#pragma omp ..." and __asm__ statement as a workaround for icc bug */ \ +__asm__("nop"); /* provide an instruction as jump target (compiler would insert an instruction if label is target of a jmp ) */ \ +ompt_label_##id:\ + printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_##id))/256-1, ((uint64_t)(char*)(&& ompt_label_##id))/256) + /* "&& label" returns the address of the label (GNU extension); works with gcc, clang, icc */ + /* for void-type runtime function, the label is after the nop (-1), for functions with return value, there is a mov instruction before the label (-4) */ + +/* +static void print_current_address() +{ + int real_level = 2; + void *array[real_level]; + size_t size; + void *address; + + size = backtrace (array, real_level); + if(size == real_level) + address = ((char*)array[real_level-1])-5; + else + address = NULL; + printf("%" PRIu64 ": current_address=%p\n", ompt_get_thread_data()->value, address); +} +*/ + +static void format_task_type(int type, char* buffer) +{ + char* progress = buffer; + if(type & ompt_task_initial) progress += sprintf(progress, "ompt_task_initial"); + if(type & ompt_task_implicit) progress += sprintf(progress, "ompt_task_implicit"); + if(type & ompt_task_explicit) progress += sprintf(progress, "ompt_task_explicit"); + if(type & ompt_task_target) progress += sprintf(progress, "ompt_task_target"); + if(type & ompt_task_undeferred) progress += sprintf(progress, "|ompt_task_undeferred"); + if(type & ompt_task_untied) progress += sprintf(progress, "|ompt_task_untied"); + if(type & ompt_task_final) progress += sprintf(progress, "|ompt_task_final"); + if(type & ompt_task_mergeable) progress += sprintf(progress, "|ompt_task_mergeable"); + if(type & ompt_task_merged) progress += sprintf(progress, "|ompt_task_merged"); +} + +static void +on_ompt_callback_mutex_acquire( + ompt_mutex_kind_t kind, + unsigned int hint, + unsigned int impl, + ompt_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_critical: + printf("%" PRIu64 ": ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_atomic: + printf("%" PRIu64 ": ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_ordered: + printf("%" PRIu64 ": ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_mutex_acquired( + ompt_mutex_kind_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_critical: + printf("%" PRIu64 ": ompt_event_acquired_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_atomic: + printf("%" PRIu64 ": ompt_event_acquired_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_ordered: + printf("%" PRIu64 ": ompt_event_acquired_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_mutex_released( + ompt_mutex_kind_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_release_nest_lock_last: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_critical: + printf("%" PRIu64 ": ompt_event_release_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_atomic: + printf("%" PRIu64 ": ompt_event_release_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_ordered: + printf("%" PRIu64 ": ompt_event_release_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_nest_lock( + ompt_scope_endpoint_t endpoint, + ompt_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + printf("%" PRIu64 ": ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_release_nest_lock_prev: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + } +} + +static void +on_ompt_callback_sync_region( + ompt_sync_region_kind_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + print_ids(0); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } + break; + case ompt_scope_end: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + } + break; + } +} + +static void +on_ompt_callback_sync_region_wait( + ompt_sync_region_kind_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } + break; + case ompt_scope_end: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_wait_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + } + break; + } +} + +static void +on_ompt_callback_flush( + ompt_data_t *thread_data, + const void *codeptr_ra) +{ + printf("%" PRIu64 ": ompt_event_flush: codeptr_ra=%p\n", thread_data->value, codeptr_ra); +} + +static void +on_ompt_callback_cancel( + ompt_data_t *task_data, + int flags, + const void *codeptr_ra) +{ + const char* first_flag_value; + const char* second_flag_value; + if(flags & ompt_cancel_parallel) + first_flag_value = ompt_cancel_flag_t_values[0]; + else if(flags & ompt_cancel_sections) + first_flag_value = ompt_cancel_flag_t_values[1]; + else if(flags & ompt_cancel_do) + first_flag_value = ompt_cancel_flag_t_values[2]; + else if(flags & ompt_cancel_taskgroup) + first_flag_value = ompt_cancel_flag_t_values[3]; + if(flags & ompt_cancel_activated) + second_flag_value = ompt_cancel_flag_t_values[4]; + else if(flags & ompt_cancel_detected) + second_flag_value = ompt_cancel_flag_t_values[5]; + else if(flags & ompt_cancel_discarded_task) + second_flag_value = ompt_cancel_flag_t_values[6]; + + printf("%" PRIu64 ": ompt_event_cancel: task_data=%" PRIu64 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, task_data->value, first_flag_value, second_flag_value, flags, codeptr_ra); +} static void -on_ompt_event_barrier_begin( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id) +on_ompt_callback_idle( + ompt_scope_endpoint_t endpoint) { - printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id); - print_ids(0); + switch(endpoint) + { + case ompt_scope_begin: + printf("%" PRIu64 ": ompt_event_idle_begin:\n", ompt_get_thread_data()->value); + //printf("%" PRIu64 ": ompt_event_idle_begin: thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, thread_data.value); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_idle_end:\n", ompt_get_thread_data()->value); + //printf("%" PRIu64 ": ompt_event_idle_end: thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, thread_data.value); + break; + } } static void -on_ompt_event_barrier_end( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id) +on_ompt_callback_implicit_task( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int team_size, + unsigned int thread_num) { - printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id); + switch(endpoint) + { + case ompt_scope_begin: + if(task_data->ptr) + printf("%s\n", "0: task_data initially not null"); + task_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num); + break; + } } static void -on_ompt_event_implicit_task_begin( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id) +on_ompt_callback_lock_init( + ompt_mutex_kind_t kind, + unsigned int hint, + unsigned int impl, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id); + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + default: + break; + } } static void -on_ompt_event_implicit_task_end( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id) +on_ompt_callback_lock_destroy( + ompt_mutex_kind_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id); + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_destroy_nest_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + default: + break; + } } static void -on_ompt_event_loop_begin( - ompt_parallel_id_t parallel_id, - ompt_task_id_t parent_task_id, - void *workshare_function) +on_ompt_callback_work( + ompt_work_type_t wstype, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + uint64_t count, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", workshare_function=%p\n", ompt_get_thread_id(), parallel_id, parent_task_id, workshare_function); + switch(endpoint) + { + case ompt_scope_begin: + switch(wstype) + { + case ompt_work_loop: + printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_sections: + printf("%" PRIu64 ": ompt_event_sections_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_executor: + printf("%" PRIu64 ": ompt_event_single_in_block_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_other: + printf("%" PRIu64 ": ompt_event_single_others_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_workshare: + //impl + break; + case ompt_work_distribute: + printf("%" PRIu64 ": ompt_event_distribute_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_taskloop: + //impl + printf("%" PRIu64 ": ompt_event_taskloop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + } + break; + case ompt_scope_end: + switch(wstype) + { + case ompt_work_loop: + printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_sections: + printf("%" PRIu64 ": ompt_event_sections_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_executor: + printf("%" PRIu64 ": ompt_event_single_in_block_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_other: + printf("%" PRIu64 ": ompt_event_single_others_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_workshare: + //impl + break; + case ompt_work_distribute: + printf("%" PRIu64 ": ompt_event_distribute_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_taskloop: + //impl + printf("%" PRIu64 ": ompt_event_taskloop_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + } + break; + } } static void -on_ompt_event_loop_end( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id) +on_ompt_callback_master( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id); + switch(endpoint) + { + case ompt_scope_begin: + printf("%" PRIu64 ": ompt_event_master_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_master_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } } static void -on_ompt_event_parallel_begin( - ompt_task_id_t parent_task_id, - ompt_frame_t *parent_task_frame, - ompt_parallel_id_t parallel_id, +on_ompt_callback_parallel_begin( + ompt_data_t *parent_task_data, + const ompt_frame_t *parent_task_frame, + ompt_data_t* parallel_data, uint32_t requested_team_size, - void *parallel_function, - ompt_invoker_t invoker) + ompt_invoker_t invoker, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", parallel_function=%p, invoker=%d\n", ompt_get_thread_id(), parent_task_id, parent_task_frame->exit_runtime_frame, parent_task_frame->reenter_runtime_frame, parallel_id, requested_team_size, parallel_function, invoker); + if(parallel_data->ptr) + printf("%s\n", "0: parallel_data initially not null"); + parallel_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", ompt_get_thread_data()->value, parent_task_data->value, parent_task_frame->exit_runtime_frame, parent_task_frame->reenter_runtime_frame, parallel_data->value, requested_team_size, codeptr_ra, invoker); } static void -on_ompt_event_parallel_end( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id, - ompt_invoker_t invoker) +on_ompt_callback_parallel_end( + ompt_data_t *parallel_data, + ompt_data_t *task_data, + ompt_invoker_t invoker, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d\n", ompt_get_thread_id(), parallel_id, task_id, invoker); + printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, invoker, codeptr_ra); } +static void +on_ompt_callback_task_create( + ompt_data_t *parent_task_data, /* id of parent task */ + const ompt_frame_t *parent_frame, /* frame data for parent task */ + ompt_data_t* new_task_data, /* id of created task */ + int type, + int has_dependences, + const void *codeptr_ra) /* pointer to outlined function */ +{ + if(new_task_data->ptr) + printf("%s\n", "0: new_task_data initially not null"); + new_task_data->value = ompt_get_unique_id(); + char buffer[2048]; + + format_task_type(type, buffer); -void ompt_initialize( + //there is no paralllel_begin callback for implicit parallel region + //thus it is initialized in initial task + if(type & ompt_task_initial) + { + ompt_data_t *parallel_data; + ompt_get_parallel_info(0, ¶llel_data, NULL); + if(parallel_data->ptr) + printf("%s\n", "0: parallel_data initially not null"); + parallel_data->value = ompt_get_unique_id(); + } + + printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, parent_task_data ? parent_task_data->value : 0, parent_frame ? parent_frame->exit_runtime_frame : NULL, parent_frame ? parent_frame->reenter_runtime_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no"); +} + +static void +on_ompt_callback_task_schedule( + ompt_data_t *first_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *second_task_data) +{ + printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status); + if(prior_task_status == ompt_task_complete) + { + printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value); + } +} + +static void +on_ompt_callback_task_dependences( + ompt_data_t *task_data, + const ompt_task_dependence_t *deps, + int ndeps) +{ + printf("%" PRIu64 ": ompt_event_task_dependences: task_id=%" PRIu64 ", deps=%p, ndeps=%d\n", ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps); +} + +static void +on_ompt_callback_task_dependence( + ompt_data_t *first_task_data, + ompt_data_t *second_task_data) +{ + printf("%" PRIu64 ": ompt_event_task_dependence_pair: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value); +} + +static void +on_ompt_callback_thread_begin( + ompt_thread_type_t thread_type, + ompt_data_t *thread_data) +{ + if(thread_data->ptr) + printf("%s\n", "0: thread_data initially not null"); + thread_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value); +} + +static void +on_ompt_callback_thread_end( + ompt_data_t *thread_data) +{ + printf("%" PRIu64 ": ompt_event_thread_end: thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, thread_data->value); + //printf("%" PRIu64 ": ompt_event_thread_end: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value); +} + +static int +on_ompt_callback_control_tool( + uint64_t command, + uint64_t modifier, + void *arg, + const void *codeptr_ra) +{ + ompt_frame_t* omptTaskFrame; + ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL); + printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_runtime_frame, omptTaskFrame->reenter_runtime_frame); + return 0; //success +} + +#define register_callback_t(name, type) \ +do{ \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == \ + ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ +}while(0) + +#define register_callback(name) register_callback_t(name, name##_t) + +int ompt_initialize( ompt_function_lookup_t lookup, - const char *runtime_version, - unsigned int ompt_version) + ompt_fns_t* fns) { - ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); - ompt_get_task_id = (ompt_get_task_id_t) lookup("ompt_get_task_id"); - ompt_get_task_frame = (ompt_get_task_frame_t) lookup("ompt_get_task_frame"); - ompt_get_thread_id = (ompt_get_thread_id_t) lookup("ompt_get_thread_id"); - ompt_get_parallel_id = (ompt_get_parallel_id_t) lookup("ompt_get_parallel_id"); + ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); + ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info"); + ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data"); + ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info"); + ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id"); + + ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places"); + ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids"); + ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num"); + ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums"); + ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id"); + ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states"); + ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls"); - ompt_set_callback(ompt_event_barrier_begin, (ompt_callback_t) &on_ompt_event_barrier_begin); - ompt_set_callback(ompt_event_barrier_end, (ompt_callback_t) &on_ompt_event_barrier_end); - ompt_set_callback(ompt_event_implicit_task_begin, (ompt_callback_t) &on_ompt_event_implicit_task_begin); - ompt_set_callback(ompt_event_implicit_task_end, (ompt_callback_t) &on_ompt_event_implicit_task_end); - ompt_set_callback(ompt_event_loop_begin, (ompt_callback_t) &on_ompt_event_loop_begin); - ompt_set_callback(ompt_event_loop_end, (ompt_callback_t) &on_ompt_event_loop_end); - ompt_set_callback(ompt_event_parallel_begin, (ompt_callback_t) &on_ompt_event_parallel_begin); - ompt_set_callback(ompt_event_parallel_end, (ompt_callback_t) &on_ompt_event_parallel_end); - printf("0: NULL_POINTER=%p\n", NULL); + register_callback(ompt_callback_mutex_acquire); + register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t); + register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t); + register_callback(ompt_callback_nest_lock); + register_callback(ompt_callback_sync_region); + register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t); + register_callback(ompt_callback_control_tool); + register_callback(ompt_callback_flush); + register_callback(ompt_callback_cancel); + register_callback(ompt_callback_idle); + register_callback(ompt_callback_implicit_task); + register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t); + register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t); + register_callback(ompt_callback_work); + register_callback(ompt_callback_master); + register_callback(ompt_callback_parallel_begin); + register_callback(ompt_callback_parallel_end); + register_callback(ompt_callback_task_create); + register_callback(ompt_callback_task_schedule); + register_callback(ompt_callback_task_dependences); + register_callback(ompt_callback_task_dependence); + register_callback(ompt_callback_thread_begin); + register_callback(ompt_callback_thread_end); + printf("0: NULL_POINTER=%p\n", (void*)NULL); + return 1; //success +} + +void ompt_finalize(ompt_fns_t* fns) +{ + printf("0: ompt_event_runtime_shutdown\n"); } -ompt_initialize_t ompt_tool() +ompt_fns_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) { - return &ompt_initialize; + static ompt_fns_t ompt_fns = {&ompt_initialize,&ompt_finalize}; + return &ompt_fns; } Index: runtime/test/ompt/cancel/cancel_parallel.c =================================================================== --- /dev/null +++ runtime/test/ompt/cancel/cancel_parallel.c @@ -0,0 +1,46 @@ +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt, cancel + +#include "callback.h" +#include "omp.h" + +int main() +{ + #pragma omp parallel num_threads(2) + { + if(omp_get_thread_num() == 0) + { + printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_1))/256-1, ((uint64_t)(char*)(&& ompt_label_1))/256); + #pragma omp cancel parallel + {} /* Empty block between "#pragma omp ..." and __asm__ statement as a workaround for icc bug */ + __asm__("nop"); /* provide an instruction as jump target (compiler would insert an instruction if label is target of a jmp ) */ +ompt_label_1: + __asm__("nop"); + } + else + { + usleep(100); + printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_2))/256-1, ((uint64_t)(char*)(&& ompt_label_2))/256); + #pragma omp cancellation point parallel + {} /* Empty block between "#pragma omp ..." and __asm__ statement as a workaround for icc bug */ + __asm__("nop"); /* provide an instruction as jump target (compiler would insert an instruction if label is target of a jmp ) */ +ompt_label_2: + __asm__("nop"); + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_activated=17, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: fuzzy_address={{.*}}[[OTHER_RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_detected=33, codeptr_ra=[[OTHER_RETURN_ADDRESS]]{{[0-f][0-f]}} + + return 0; +} Index: runtime/test/ompt/cancel/cancel_taskgroup.c =================================================================== --- /dev/null +++ runtime/test/ompt/cancel/cancel_taskgroup.c @@ -0,0 +1,86 @@ +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt, cancel, taskgroup + +#include "callback.h" +#include +#include + +int main() +{ + int condition=0; + #pragma omp parallel num_threads(2) + {} + + print_frame(0); + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp taskgroup + { + #pragma omp task shared(condition) + { + printf("start execute task 1\n"); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + #pragma omp cancellation point taskgroup + printf("end execute task 1\n"); + } + #pragma omp task shared(condition) + { + printf("start execute task 2\n"); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + #pragma omp cancellation point taskgroup + printf("end execute task 2\n"); + } + #pragma omp task shared(condition) + { + printf("start execute task 3\n"); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + #pragma omp cancellation point taskgroup + printf("end execute task 3\n"); + } + #pragma omp task if(0) shared(condition) + { + printf("start execute task 4\n"); + OMPT_WAIT(condition,1); + #pragma omp cancel taskgroup + printf("end execute task 4\n"); + } + OMPT_SIGNAL(condition); + } + } + #pragma omp barrier + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[FIRST_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[SECOND_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[CANCEL_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[PARENT_TASK_ID]], second_task_id=[[CANCEL_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[CANCEL_TASK_ID]], flags=ompt_cancel_taskgroup|ompt_cancel_activated=24, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[CANCEL_TASK_ID]], second_task_id=[[PARENT_TASK_ID]], prior_task_status=ompt_task_cancel=3 + + // CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]] + // CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_detected=40, codeptr_ra={{0x[0-f]*}} + + return 0; +} Index: runtime/test/ompt/cancel/cancel_worksharing.c =================================================================== --- /dev/null +++ runtime/test/ompt/cancel/cancel_worksharing.c @@ -0,0 +1,65 @@ +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt, cancel + +#include "callback.h" +#include + +int main() +{ + int condition=0; + #pragma omp parallel num_threads(2) + { + int x = 0; + int i; + #pragma omp for + for(i = 0; i < 2; i++) + { + if(i == 0) + { + x++; + OMPT_SIGNAL(condition); + #pragma omp cancel for + } + else + { + x++; + OMPT_WAIT(condition,1); + usleep(10000); + #pragma omp cancellation point for + } + } + } + #pragma omp parallel num_threads(2) + { + #pragma omp sections + { + #pragma omp section + { + OMPT_SIGNAL(condition); + #pragma omp cancel sections + } + #pragma omp section + { + OMPT_WAIT(condition,2); + usleep(10000); + #pragma omp cancellation point sections + } + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + + // cancel for and sections + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_activated=20, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_activated=18, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_detected=36, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_detected=34, codeptr_ra={{0x[0-f]*}} + + return 0; +} Index: runtime/test/ompt/loadtool/tool.c =================================================================== --- /dev/null +++ runtime/test/ompt/loadtool/tool.c @@ -0,0 +1,27 @@ +// RUN: true +#include +#include +#include +#include +#include + +int ompt_initialize( + ompt_function_lookup_t lookup, + ompt_fns_t* fns) +{ + printf("0: NULL_POINTER=%p\n", (void*)NULL); + return 1; //success +} + +void ompt_finalize(ompt_fns_t* fns) +{ + printf("%d: ompt_event_runtime_shutdown\n", omp_get_thread_num()); +} + +ompt_fns_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) +{ + static ompt_fns_t ompt_fns = {&ompt_initialize,&ompt_finalize}; + return &ompt_fns; +} Index: runtime/test/ompt/loadtool/tool_available.c =================================================================== --- /dev/null +++ runtime/test/ompt/loadtool/tool_available.c @@ -0,0 +1,20 @@ +// RUN: %libomp-tool %S/tool.c && %libomp-compile && env OMP_TOOL_LIBRARIES=%T/tool.so %libomp-run | FileCheck %s +// REQUIRES: ompt + +#include "omp.h" + +int main() +{ + #pragma omp parallel num_threads(2) + { + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}0: ompt_event_runtime_shutdown + + return 0; +} Index: runtime/test/ompt/misc/api_calls.c =================================================================== --- /dev/null +++ runtime/test/ompt/misc/api_calls.c @@ -0,0 +1,66 @@ +// RUN: %libomp-compile && env OMP_PLACES=cores %libomp-run | FileCheck %s +// REQUIRES: ompt, linux +#include "callback.h" +#include +#define __USE_GNU +#include +#undef __USE_GNU + +void print_list(char* function_name, int list[]) +{ + printf("%" PRIu64 ": %s(0)=(%d", ompt_get_thread_data()->value, function_name, list[0]); + int i; + for(i = 1; i < omp_get_place_num_procs(0); i++) + { + printf(",%d", list[i]); + } + printf(")\n"); +} + +int main() +{ + #pragma omp parallel num_threads(1) + { + printf("%" PRIu64 ": omp_get_num_places()=%d\n", ompt_get_thread_data()->value, omp_get_num_places()); + printf("%" PRIu64 ": ompt_get_num_places()=%d\n", ompt_get_thread_data()->value, ompt_get_num_places()); + + int omp_ids[omp_get_place_num_procs(0)]; + omp_get_place_proc_ids(0, omp_ids); + print_list("omp_get_place_proc_ids" ,omp_ids); + int ompt_ids[omp_get_place_num_procs(0)]; + ompt_get_place_proc_ids(0, omp_get_place_num_procs(0), ompt_ids); + print_list("ompt_get_place_proc_ids", ompt_ids); + + printf("%" PRIu64 ": omp_get_place_num()=%d\n", ompt_get_thread_data()->value, omp_get_place_num()); + printf("%" PRIu64 ": ompt_get_place_num()=%d\n", ompt_get_thread_data()->value, ompt_get_place_num()); + + int omp_nums[omp_get_partition_num_places()]; + omp_get_partition_place_nums(omp_nums); + print_list("omp_get_partition_place_nums" ,omp_nums); + int ompt_nums[omp_get_partition_num_places()]; + ompt_get_partition_place_nums(omp_get_partition_num_places(), ompt_nums); + print_list("ompt_get_partition_place_nums", ompt_nums); + + printf("%" PRIu64 ": sched_getcpu()=%d\n", ompt_get_thread_data()->value, sched_getcpu()); + printf("%" PRIu64 ": ompt_get_proc_id()=%d\n", ompt_get_thread_data()->value, ompt_get_proc_id()); + } + + // Check if libomp supports the callbacks for this test. + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: omp_get_num_places()=[[NUM_PLACES:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_places()=[[NUM_PLACES]] + + // CHECK: {{^}}[[MASTER_ID]]: omp_get_place_proc_ids(0)=([[PROC_IDS:[0-9\,]+]]) + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_proc_ids(0)=([[PROC_IDS]]) + + // CHECK: {{^}}[[MASTER_ID]]: omp_get_place_num()=[[PLACE_NUM:[-]?[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_num()=[[PLACE_NUM]] + + // CHECK: {{^}}[[MASTER_ID]]: sched_getcpu()=[[CPU_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_proc_id()=[[CPU_ID]] + + + return 0; +} Index: runtime/test/ompt/misc/control_tool.c =================================================================== --- /dev/null +++ runtime/test/ompt/misc/control_tool.c @@ -0,0 +1,27 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + #pragma omp parallel num_threads(1) + { + print_frame(1); + print_frame(0); + omp_control_tool(omp_control_tool_flush, 1, NULL); + print_current_address(0); + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_control_tool' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(1)=[[EXIT_FRAME:0x[0-f]*]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER_FRAME:0x[0-f]*]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter=[[REENTER_FRAME]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/misc/idle.c =================================================================== --- /dev/null +++ runtime/test/ompt/misc/idle.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(3) + { + #pragma omp atomic + x++; + } + #pragma omp parallel num_threads(2) + { + #pragma omp atomic + x++; + } + + + printf("x=%d\n", x); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_idle' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_idle_begin: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_idle_end: + + return 0; +} Index: runtime/test/ompt/ompt-signal.h =================================================================== --- /dev/null +++ runtime/test/ompt/ompt-signal.h @@ -0,0 +1,24 @@ +// These functions are used to provide a signal-wait mechanism to enforce expected scheduling for the test cases. +// Conditional variable (s) needs to be shared! Initialize to 0 +#include + +#define OMPT_SIGNAL(s) ompt_signal(&s) +//inline +void ompt_signal(int* s) +{ + #pragma omp atomic + (*s)++; +} + +#define OMPT_WAIT(s,v) ompt_wait(&s,v) +// wait for s >= v +//inline +void ompt_wait(int *s, int v) +{ + int wait=0; + do{ + usleep(10); + #pragma omp atomic read + wait = (*s); + }while(wait + +int main() +{ + omp_set_nested(1); + omp_set_max_active_levels(1); + + #pragma omp parallel num_threads(2) + { + print_ids(0); + print_ids(1); + #pragma omp parallel num_threads(2) + { + print_ids(0); + print_ids(1); + print_ids(2); + } + print_fuzzy_address(1); + } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} Index: runtime/test/ompt/parallel/nested.c =================================================================== --- runtime/test/ompt/parallel/nested.c +++ runtime/test/ompt/parallel/nested.c @@ -3,9 +3,11 @@ // REQUIRES: ompt #include "callback.h" #include +#include int main() { + int condition=0; omp_set_nested(1); print_frame(0); @@ -15,6 +17,10 @@ print_ids(0); print_ids(1); print_frame(0); + + //get all implicit task events before starting nested: + #pragma omp barrier + #pragma omp parallel num_threads(4) { print_frame(1); @@ -22,17 +28,38 @@ print_ids(1); print_ids(2); print_frame(0); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,16); #pragma omp barrier + print_fuzzy_address(1); print_ids(0); } + print_fuzzy_address(2); print_ids(0); } + print_fuzzy_address(3); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! @@ -46,219 +73,224 @@ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] // THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]] // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // nested parallel masters // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[NESTED_EXIT:0x[0-f]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] - // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end // explicit barrier - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] // implicit barrier - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] // implicit barrier - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // nested parallel worker threads // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] return 0; } Index: runtime/test/ompt/parallel/nested_lwt.c =================================================================== --- runtime/test/ompt/parallel/nested_lwt.c +++ runtime/test/ompt/parallel/nested_lwt.c @@ -3,35 +3,59 @@ // REQUIRES: ompt #include "callback.h" #include +#include int main() { omp_set_nested(1); + int condition; #pragma omp parallel num_threads(4) { print_ids(0); print_ids(1); + //get all implicit task events before starting nested: + #pragma omp barrier #pragma omp parallel num_threads(1) { print_ids(0); print_ids(1); print_ids(2); + //get all implicit task events before starting nested: + #pragma omp barrier #pragma omp parallel num_threads(4) { print_ids(0); print_ids(1); print_ids(2); print_ids(3); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,16); } + print_fuzzy_address(1); } + print_fuzzy_address(2); } + print_fuzzy_address(3); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! @@ -48,251 +72,261 @@ // THREADS: 0: NULL_POINTER=[[NULL:.*$]] - // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // nested parallel masters // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[MASTER_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // nested parallel worker threads // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] return 0; Index: runtime/test/ompt/parallel/nested_serialized.c =================================================================== --- runtime/test/ompt/parallel/nested_serialized.c +++ runtime/test/ompt/parallel/nested_serialized.c @@ -18,13 +18,29 @@ print_ids(1); print_ids(2); } + print_fuzzy_address(1); } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! @@ -41,67 +57,71 @@ // THREADS: 0: NULL_POINTER=[[NULL:.*$]] - // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] return 0; } Index: runtime/test/ompt/parallel/no_thread_num_clause.c =================================================================== --- /dev/null +++ runtime/test/ompt/parallel/no_thread_num_clause.c @@ -0,0 +1,95 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +#include "callback.h" + +int main() +{ + omp_set_num_threads(4); + #pragma omp parallel + { + print_ids(0); + print_ids(1); + } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=(nil), parent_task_frame.reenter=(nil), new_task_id=281474976710658, codeptr_ra=(nil), task_type=ompt_task_initial=1, has_dependences=no + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}} + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} Index: runtime/test/ompt/parallel/normal.c =================================================================== --- runtime/test/ompt/parallel/normal.c +++ runtime/test/ompt/parallel/normal.c @@ -10,12 +10,27 @@ print_ids(0); print_ids(1); } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! @@ -28,43 +43,48 @@ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] // THREADS: 0: NULL_POINTER=[[NULL:.*$]] - // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker={{.*}} + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{.*}} // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] return 0; } Index: runtime/test/ompt/parallel/not_enough_threads.c =================================================================== --- /dev/null +++ runtime/test/ompt/parallel/not_enough_threads.c @@ -0,0 +1,76 @@ +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | FileCheck %s +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | %sort-threads | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +#include "callback.h" + +int main() +{ + #pragma omp parallel num_threads(4) + { + print_ids(0); + print_ids(1); + } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}} + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} Index: runtime/test/ompt/parallel/parallel_if0.c =================================================================== --- /dev/null +++ runtime/test/ompt/parallel/parallel_if0.c @@ -0,0 +1,75 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" + +int main() +{ +// print_frame(0); + #pragma omp parallel if(0) + { +// print_frame(1); + print_ids(0); + print_ids(1); +// print_frame(0); + #pragma omp parallel if(0) + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); +// print_frame(0); + #pragma omp task + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + print_ids(3); + } + } + print_fuzzy_address(1); + } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]] + + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/parallel/serialized.c =================================================================== --- runtime/test/ompt/parallel/serialized.c +++ runtime/test/ompt/parallel/serialized.c @@ -4,21 +4,73 @@ int main() { +// print_frame(0); #pragma omp parallel num_threads(1) { +// print_frame(1); print_ids(0); print_ids(1); +// print_frame(0); + #pragma omp parallel num_threads(1) + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); +// print_frame(0); + #pragma omp task + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + print_ids(3); + } + } + print_fuzzy_address(1); } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end' // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[OUTER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[INNER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]] + + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[INNER_RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[INNER_RETURN_ADDRESS]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[OUTER_RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[OUTER_RETURN_ADDRESS]] return 0; } Index: runtime/test/ompt/synchronization/barrier/explicit.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/explicit.c @@ -0,0 +1,57 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + + #pragma omp parallel num_threads(2) + { + #pragma omp atomic + x++; + + #pragma omp barrier + print_current_address(); + + #pragma omp atomic + x++; + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread explicit barrier + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + + + // worker thread explicit barrier + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} Index: runtime/test/ompt/synchronization/barrier/for_loop.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/for_loop.c @@ -0,0 +1,55 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int y[] = {0,1,2,3}; + + #pragma omp parallel num_threads(2) + { + //implicit barrier at end of for loop + int i; + #pragma omp for + for (i = 0; i < 4; i++) + { + y[i]++; + } + print_current_address(); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at loop end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + + // worker thread explicit barrier + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // worker thread implicit barrier after parallel + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} Index: runtime/test/ompt/synchronization/barrier/for_simd.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/for_simd.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include + +int main() +{ + int y[] = {0,1,2,3}; + + int i; + #pragma omp for simd + for (i = 0; i < 4; i++) + { + y[i]++; + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at simd loop end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + return 0; +} Index: runtime/test/ompt/synchronization/barrier/parallel_region.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/parallel_region.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + + //implicit barrier at end of a parallel region + #pragma omp parallel num_threads(2) + { + #pragma omp atomic + x++; + } + print_fuzzy_address(); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} Index: runtime/test/ompt/synchronization/barrier/sections.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/sections.c @@ -0,0 +1,63 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + + #pragma omp parallel num_threads(2) + { + //implicit barrier after sections with nowait but with lastprivates + //implicit barrier at end of sections + #pragma omp sections + { + #pragma omp section + { + #pragma omp atomic + x++; + } + + #pragma omp section + { + #pragma omp atomic + x++; + } + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at sections end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + + // worker thread implicit barrier at sections end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} Index: runtime/test/ompt/synchronization/barrier/single.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/single.c @@ -0,0 +1,60 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + + #pragma omp parallel num_threads(2) + { + //implicit barrier at end of single + #pragma omp single + { + x++; + } + print_fuzzy_address(); + //critical section to avoid merge of two barriers into one + #pragma omp critical + { + x++; + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at single end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + + // worker thread implicit barrier at single end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} Index: runtime/test/ompt/synchronization/critical.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/critical.c @@ -0,0 +1,31 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + #pragma omp critical + { + print_current_address(1); + print_ids(0); + } + print_current_address(2); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_critical: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/flush.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/flush.c @@ -0,0 +1,29 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt, flush +#include "callback.h" +#include + +int main() +{ + #pragma omp parallel num_threads(2) + { + int tid = omp_get_thread_num(); + + #pragma omp flush + print_current_address(1); + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_flush' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_flush: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: current_address=[[RETURN_ADDRESS]] + // + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_flush: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: current_address=[[RETURN_ADDRESS]] + + + + return 0; +} Index: runtime/test/ompt/synchronization/lock.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/lock.c @@ -0,0 +1,44 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + //need to use an OpenMP construct so that OMPT will be initalized + #pragma omp parallel num_threads(1) + print_ids(0); + + omp_lock_t lock; + printf("%" PRIu64 ": &lock: %lli\n", ompt_get_thread_data()->value, (long long) &lock); + omp_init_lock(&lock); + print_current_address(1); + omp_set_lock(&lock); + print_current_address(2); + omp_unset_lock(&lock); + print_current_address(3); + omp_destroy_lock(&lock); + print_current_address(4); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: &lock: [[WAIT_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_init_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/master.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/master.c @@ -0,0 +1,33 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt, master_callback +#include "callback.h" +#include + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + print_fuzzy_address(1); + x++; + } + print_current_address(2); + } + + printf("%" PRIu64 ": x=%d\n", ompt_get_thread_data()->value, x); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_master_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS_END:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: current_address=[[RETURN_ADDRESS_END]] + + + return 0; +} Index: runtime/test/ompt/synchronization/nest_lock.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/nest_lock.c @@ -0,0 +1,52 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + //need to use an OpenMP construct so that OMPT will be initalized + #pragma omp parallel num_threads(1) + print_ids(0); + + omp_nest_lock_t nest_lock; + printf("%" PRIu64 ": &nest_lock: %lli\n", ompt_get_thread_data()->value, (long long) &nest_lock); + omp_init_nest_lock(&nest_lock); + print_current_address(1); + omp_set_nest_lock(&nest_lock); + print_current_address(2); + omp_set_nest_lock(&nest_lock); + print_current_address(3); + omp_unset_nest_lock(&nest_lock); + print_current_address(4); + omp_unset_nest_lock(&nest_lock); + print_current_address(5); + omp_destroy_nest_lock(&nest_lock); + print_current_address(6); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/ordered.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/ordered.c @@ -0,0 +1,31 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + #pragma omp ordered + { + print_current_address(1); + print_ids(0); + } + print_current_address(2); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_ordered: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/taskgroup.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/taskgroup.c @@ -0,0 +1,48 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt, cancel, taskgroup + +#include "callback.h" +#include +#include + +int main() +{ + int condition=0; + int x=0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp taskgroup + { + print_current_address(1); + #pragma omp task + { + #pragma omp atomic + x++; + } + } + print_current_address(2); + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskgroup_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/taskwait.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/taskwait.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt, master_callback +#include "callback.h" +#include + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp task + { + x++; + } + #pragma omp taskwait + print_current_address(1); + } + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: ompt_event_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + // -CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra= + // -CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra= + // -CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra= + // -CHECK: {{^}}[[THREAD_ID]]: ompt_event_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra= + + return 0; +} Index: runtime/test/ompt/synchronization/test_lock.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/test_lock.c @@ -0,0 +1,54 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt, master_callback + +#include "callback.h" +#include + +int main() +{ + omp_lock_t lock; + omp_init_lock(&lock); + print_current_address(1); + + omp_test_lock(&lock); + print_current_address(2); + omp_unset_lock(&lock); + print_current_address(3); + + omp_set_lock(&lock); + print_current_address(4); + omp_test_lock(&lock); + print_current_address(5); + omp_unset_lock(&lock); + print_current_address(6); + + omp_destroy_lock(&lock); + print_current_address(7); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/test_nest_lock.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/test_nest_lock.c @@ -0,0 +1,42 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt, master_callback + +#include "callback.h" +#include + +int main() +{ + omp_nest_lock_t nest_lock; + omp_init_nest_lock(&nest_lock); + + omp_test_nest_lock(&nest_lock); + omp_unset_nest_lock(&nest_lock); + + omp_set_nest_lock(&nest_lock); + omp_test_nest_lock(&nest_lock); + omp_unset_nest_lock(&nest_lock); + omp_unset_nest_lock(&nest_lock); + + omp_destroy_nest_lock(&nest_lock); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + + return 0; +} Index: runtime/test/ompt/synchronization/test_nest_lock_parallel.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/test_nest_lock_parallel.c @@ -0,0 +1,59 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt, master_callback +#include "callback.h" +#include + +int main() +{ + omp_nest_lock_t nest_lock; + omp_init_nest_lock(&nest_lock); + + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + omp_set_nest_lock(&nest_lock); + print_current_address(1); + } + #pragma omp barrier + omp_test_nest_lock(&nest_lock); //should fail for non-master + print_current_address(2); + #pragma omp barrier + #pragma omp master + { + omp_unset_nest_lock(&nest_lock); + print_current_address(3); + omp_unset_nest_lock(&nest_lock); + print_current_address(4); + } + } + + omp_destroy_nest_lock(&nest_lock); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NOT: {{^}}[[THREAD_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]] + // CHECK-NEXT: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/tasks/dependences.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/dependences.c @@ -0,0 +1,52 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt, dependences +#include "callback.h" +#include +#include +#include + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp task depend(out:x) + { + x++; + usleep(100); + } + print_fuzzy_address(1); + + #pragma omp task depend(in:x) + { + x = -1; + } + } + } + + x++; + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependences' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependence' + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter=[[NULL]], new_task_id=[[FIRST_TASK:[0-f]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, task_type=ompt_task_explicit=4, has_dependences=yes + // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependences: task_id=[[FIRST_TASK]], deps={{0x[0-f]+}}, ndeps=1 + // CHECK: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter=[[NULL]], new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=yes + // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependences: task_id=[[SECOND_TASK]], deps={{0x[0-f]+}}, ndeps=1 + // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependence_pair: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]] + + + return 0; +} Index: runtime/test/ompt/tasks/explicit_task.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/explicit_task.c @@ -0,0 +1,100 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int condition=0; + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + #pragma omp task shared(condition) + { + OMPT_SIGNAL(condition); + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + } + print_fuzzy_address(1); + OMPT_WAIT(condition,1); + print_ids(0); + } + #pragma omp barrier + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // <- ompt_event_task_create would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // explicit barrier after master + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // this is expected to come earlier and at MASTER: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} Index: runtime/test/ompt/tasks/serialized.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/serialized.c @@ -0,0 +1,93 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include +#include + +int main() +{ + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + int t = (int)sin(0.1); + #pragma omp task if(t) + { + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + } + print_fuzzy_address(1); + print_ids(0); + } + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // <- ompt_event_task_schedule ([[IMPLICIT_TASK_ID]], [[TASK_ID]]) would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // <- ompt_event_task_schedule ([[TASK_ID]], [[IMPLICIT_TASK_ID]]) would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reen + + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} Index: runtime/test/ompt/tasks/task_in_joinbarrier.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/task_in_joinbarrier.c @@ -0,0 +1,90 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int condition=0; + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + #pragma omp task shared(condition) + { + OMPT_SIGNAL(condition); + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + } + OMPT_WAIT(condition,1); + print_ids(0); + } + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // <- ompt_event_task_create would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // implicit barrier parallel + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} Index: runtime/test/ompt/tasks/task_types.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/task_types.c @@ -0,0 +1,115 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include +#include + + +void print_task_type(int id) +{ + #pragma omp critical + { + int task_type; + char buffer[2048]; + ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL); + format_task_type(task_type, buffer); + printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type); + } +}; + +int main() +{ + //initial task + print_task_type(0); + + int x; + //implicit task + #pragma omp parallel num_threads(1) + { + print_task_type(1); + x++; + } + + #pragma omp parallel num_threads(2) + #pragma omp master + { + //explicit task + #pragma omp task + { + print_task_type(2); + x++; + } + + //explicit task with undeferred + #pragma omp task if(0) + { + print_task_type(3); + x++; + } + + //explicit task with untied + #pragma omp task untied + { + print_task_type(4); + x++; + } + + //explicit task with final + #pragma omp task final(1) + { + print_task_type(5); + x++; + //nested explicit task with final and undeferred + #pragma omp task + { + print_task_type(6); + x++; + } + } + + //TODO:not working + //explicit task with mergeable + /* + #pragma omp task mergeable if((int)sin(0)) + { + print_task_type(7); + x++; + } + */ + + //TODO: merged task + } + + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // CHECK-NOT: 0: parallel_data initially not null + // CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1 + // CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730 + + // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK-DAG: {{^[0-9]+}}: id=2 task_type=ompt_task_explicit=4 + + // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK-DAG: {{^[0-9]+}}: id=3 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_untied=268435460, has_dependences=no + // CHECK-DAG: {{^[0-9]+}}: id=4 task_type=ompt_task_explicit|ompt_task_untied=268435460 + + // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_final=536870916, has_dependences=no + // CHECK-DAG: {{^[0-9]+}}: id=5 task_type=ompt_task_explicit|ompt_task_final=536870916 + + // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no + // CHECK-DAG: {{^[0-9]+}}: id=6 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644 + + // ___CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no + // ___CHECK-DAG: {{^[0-9]+}}: id=7 task_type=ompt_task_explicit=4 + + return 0; +} Index: runtime/test/ompt/tasks/task_types_serialized.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/task_types_serialized.c @@ -0,0 +1,112 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include + +void print_task_type(int id) +{ + #pragma omp critical + { + int task_type; + char buffer[2048]; + ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL); + format_task_type(task_type, buffer); + printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type); + } +}; + +int main() +{ + //initial task + print_task_type(0); + + int x; + //implicit task + #pragma omp parallel num_threads(1) + { + print_task_type(1); + x++; + } + + #pragma omp parallel num_threads(1) + #pragma omp master + { + //explicit task + #pragma omp task + { + print_task_type(2); + x++; + } + + //explicit task with undeferred + #pragma omp task if(0) + { + print_task_type(3); + x++; + } + + //explicit task with untied + #pragma omp task untied + { + print_task_type(4); + x++; + } + + //explicit task with final + #pragma omp task final(1) + { + print_task_type(5); + x++; + //nested explicit task with final and undeferred + #pragma omp task + { + print_task_type(6); + x++; + } + } + +/* + //TODO:not working + //explicit task with mergeable + #pragma omp task mergeable + { + print_task_type(7); + x++; + } +*/ + + //TODO: merged task + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1 + // CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK: {{^[0-9]+}}: id=2 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK: {{^[0-9]+}}: id=3 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188, has_dependences=no + // CHECK: {{^[0-9]+}}: id=4 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no + // CHECK: {{^[0-9]+}}: id=5 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no + // CHECK: {{^[0-9]+}}: id=6 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644 + + // ___CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // ___CHECK: {{^[0-9]+}}: id=7 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + return 0; +} Index: runtime/test/ompt/tasks/taskyield.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/taskyield.c @@ -0,0 +1,62 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt, taskyield +#include "callback.h" +#include +#include + +int main() +{ + int condition=0, x=0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { +// #pragma omp task shared(condition) +// { + #pragma omp task shared(condition) + { + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + } + OMPT_WAIT(condition,1); + #pragma omp task shared(x) + { + x++; + } + printf("%" PRIu64 ": before yield\n", ompt_get_thread_data()->value); + #pragma omp taskyield + printf("%" PRIu64 ": after yield\n", ompt_get_thread_data()->value); + OMPT_SIGNAL(condition); +// } + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[0-9]+}}, thread_num={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[WORKER_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[MAIN_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[MAIN_TASK]], prior_task_status=ompt_task_yield=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[MAIN_TASK]], second_task_id=[[IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_task_schedule: first_task_id={{[0-9]+}}, second_task_id=[[WORKER_TASK]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[WORKER_TASK]], second_task_id={{[0-9]+}}, prior_task_status=ompt_task_complete=1 + + + + + + return 0; +} Index: runtime/test/ompt/tasks/untied_task.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/untied_task.c @@ -0,0 +1,107 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int condition=0; + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + #pragma omp task untied shared(condition) + { + OMPT_SIGNAL(condition); + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + #pragma omp task if(0) + { + print_ids(0); + print_ids(1); + print_ids(2); + } + print_ids(0); + print_ids(1); + print_ids(2); + } + OMPT_WAIT(condition,1); + print_ids(0); + } + #pragma omp barrier + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // <- ompt_event_task_create would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // explicit barrier after master + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // this is expected to come earlier and at MASTER: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} Index: runtime/test/ompt/worksharing/for/auto_split.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/auto_split.c @@ -0,0 +1,8 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt +// GCC doesn't call runtime for auto = static schedule +// XFAIL: gcc + +#define SCHEDULE auto +#include "base_split.h" Index: runtime/test/ompt/worksharing/for/base.h =================================================================== --- runtime/test/ompt/worksharing/for/base.h +++ runtime/test/ompt/worksharing/for/base.h @@ -9,28 +9,35 @@ for (i = 0; i < 4; i++) { } + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker={{.*}} + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] return 0; } Index: runtime/test/ompt/worksharing/for/base_serialized.h =================================================================== --- runtime/test/ompt/worksharing/for/base_serialized.h +++ runtime/test/ompt/worksharing/for/base_serialized.h @@ -8,14 +8,21 @@ #pragma omp parallel for num_threads(1) schedule(SCHEDULE) for (i = 0; i < 1; i++) { } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=0x{{[0-f]+}}, invoker={{.+}} + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}} - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[PARALLEL_ID,0]}}, task_id=[[IMPLICIT_TASK_ID]] return 0; } Index: runtime/test/ompt/worksharing/for/base_split.h =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/base_split.h @@ -0,0 +1,66 @@ +#include "callback.h" +#include + +/* With the combined parallel-for construct (base.h), the return-addresses are hard to compare. + With the separate parallel and for-nowait construct, the addresses become more predictable, + but the begin of the for-loop still generates additional code, so the offset of loop-begin + to the label is >4 Byte. +*/ + +int main() +{ + unsigned int i; + + #pragma omp parallel num_threads(4) + { + print_current_address(0); + #pragma omp for schedule(SCHEDULE) nowait + for (i = 0; i < 4; i++) { + print_fuzzy_address(1); + } + print_fuzzy_address(2); + } + print_fuzzy_address(3); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[PARALLEL_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, invoker={{[0-9]+}}, codeptr_ra=[[PARALLEL_RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[PARALLEL_RETURN_ADDRESS]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + + // CHECK-LOOP: 0: NULL_POINTER=[[NULL:.*$]] + // CHECK-LOOP: 0: ompt_event_runtime_shutdown + // CHECK-LOOP: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra={{0x[0-f]+}}, invoker={{[0-9]+}} + // CHECK-LOOP: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + + + return 0; +} Index: runtime/test/ompt/worksharing/for/dynamic_split.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/dynamic_split.c @@ -0,0 +1,6 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt + +#define SCHEDULE dynamic +#include "base_split.h" Index: runtime/test/ompt/worksharing/for/guided_split.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/guided_split.c @@ -0,0 +1,6 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt + +#define SCHEDULE guided +#include "base_split.h" Index: runtime/test/ompt/worksharing/for/runtime_split.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/runtime_split.c @@ -0,0 +1,6 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt + +#define SCHEDULE runtime +#include "base_split.h" Index: runtime/test/ompt/worksharing/for/static_split.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/static_split.c @@ -0,0 +1,8 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt +// GCC doesn't call runtime for static schedule +// XFAIL: gcc + +#define SCHEDULE static +#include "base_split.h" Index: runtime/test/ompt/worksharing/sections.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/sections.c @@ -0,0 +1,33 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + #pragma omp parallel sections num_threads(2) + { + #pragma omp section + { + printf("%lu: section 1\n", ompt_get_thread_data()->value); + } + #pragma omp section + { + printf("%lu: section 2\n", ompt_get_thread_data()->value); + } + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN:0x[0-f]+]], count=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END:0x[0-f]+]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN]], count=2 + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END]] + + + return 0; +} Index: runtime/test/ompt/worksharing/single.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/single.c @@ -0,0 +1,33 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt, single_callback +#include "callback.h" +#include + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp single + { + x++; + } + } + + printf("x=%d\n", x); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[THREAD_ID_1:[0-9]+]]: ompt_event_single_in_block_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1 + // CHECK: {{^}}[[THREAD_ID_1]]: ompt_event_single_in_block_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1 + + // CHECK: {{^}}[[THREAD_ID_2:[0-9]+]]: ompt_event_single_others_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1 + // CHECK: {{^}}[[THREAD_ID_2]]: ompt_event_single_others_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1 + + + + return 0; +}