Index: runtime/.clang-format =================================================================== --- runtime/.clang-format +++ runtime/.clang-format @@ -2,94 +2,4 @@ BasedOnStyle: LLVM AlignTrailingComments: false SortIncludes: false -AlignOperands: false -DisableFormat: true -KeepEmptyLinesAtTheStartOfBlocks: false -MaxEmptyLinesToKeep: 2 --- -# Language: Cpp -# AccessModifierOffset: -2 -# AlignAfterOpenBracket: Align -# AlignConsecutiveAssignments: false -# AlignConsecutiveDeclarations: false -# AlignEscapedNewlinesLeft: false -# AlignOperands: false -# AlignTrailingComments: false -# AllowAllParametersOfDeclarationOnNextLine: true -# AllowShortBlocksOnASingleLine: false -# AllowShortCaseLabelsOnASingleLine: false -# AllowShortFunctionsOnASingleLine: All -# AllowShortIfStatementsOnASingleLine: false -# AllowShortLoopsOnASingleLine: false -# AlwaysBreakAfterDefinitionReturnType: None -# AlwaysBreakAfterReturnType: None -# AlwaysBreakBeforeMultilineStrings: false -# AlwaysBreakTemplateDeclarations: false -# BinPackArguments: true -# BinPackParameters: true -# BraceWrapping: -# AfterClass: false -# AfterControlStatement: false -# AfterEnum: false -# AfterFunction: false -# AfterNamespace: false -# AfterObjCDeclaration: false -# AfterStruct: false -# AfterUnion: false -# BeforeCatch: false -# BeforeElse: false -# IndentBraces: false -# # BreakBeforeBinaryOperators: None -# BreakBeforeBraces: Attach -# BreakBeforeTernaryOperators: true -# BreakConstructorInitializersBeforeComma: false -# ColumnLimit: 80 -# CommentPragmas: '^ IWYU pragma:' -# ConstructorInitializerAllOnOneLineOrOnePerLine: false -# ConstructorInitializerIndentWidth: 4 -# ContinuationIndentWidth: 4 -# Cpp11BracedListStyle: true -# DerivePointerAlignment: false -# DisableFormat: true -# ExperimentalAutoDetectBinPacking: false -# ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] -# IncludeCategories: -# - Regex: '^"(llvm|llvm-c|clang|clang-c)/' -# Priority: 2 -# - Regex: '^(<|"(gtest|isl|json)/)' -# Priority: 3 -# - Regex: '.*' -# Priority: 1 -# IndentCaseLabels: false -# IndentWidth: 2 -# IndentWrappedFunctionNames: false -# KeepEmptyLinesAtTheStartOfBlocks: false -# MacroBlockBegin: '' -# MacroBlockEnd: '' -# MaxEmptyLinesToKeep: 2 -# NamespaceIndentation: None -# ObjCBlockIndentWidth: 2 -# ObjCSpaceAfterProperty: false -# ObjCSpaceBeforeProtocolList: true -# PenaltyBreakBeforeFirstCallParameter: 19 -# PenaltyBreakComment: 300 -# PenaltyBreakFirstLessLess: 120 -# PenaltyBreakString: 1000 -# PenaltyExcessCharacter: 1000000 -# PenaltyReturnTypeOnItsOwnLine: 60 -# PointerAlignment: Right -# ReflowComments: true -# SpaceAfterCStyleCast: false -# SpaceBeforeAssignmentOperators: true -# SpaceBeforeParens: ControlStatements -# SpaceInEmptyParentheses: false -# SpacesBeforeTrailingComments: 1 -# SpacesInAngles: false -# SpacesInContainerLiterals: true -# SpacesInCStyleCastParentheses: false -# SpacesInParentheses: false -# SpacesInSquareBrackets: false -# Standard: Cpp11 -# TabWidth: 8 -# UseTab: Never -... Index: runtime/CMakeLists.txt =================================================================== --- runtime/CMakeLists.txt +++ runtime/CMakeLists.txt @@ -321,12 +321,11 @@ # OMPT-support set(LIBOMP_OMPT_DEBUG FALSE CACHE BOOL "Trace OMPT initialization?") +#after testing: turn on ompt support by default for OpenMP 5.0 and higher set(LIBOMP_OMPT_SUPPORT FALSE CACHE BOOL "OMPT-support?") -set(LIBOMP_OMPT_BLAME TRUE CACHE BOOL - "OMPT-blame?") -set(LIBOMP_OMPT_TRACE TRUE CACHE BOOL - "OMPT-trace?") +set(LIBOMP_OMPT_OPTIONAL TRUE CACHE BOOL + "OMPT-optional?") if(LIBOMP_OMPT_SUPPORT AND (NOT LIBOMP_HAVE_OMPT_SUPPORT)) libomp_error_say("OpenMP Tools Interface requested but not available in this implementation") endif() @@ -396,8 +395,7 @@ libomp_say("Use ITT notify -- ${LIBOMP_USE_ITT_NOTIFY}") libomp_say("Use OMPT-support -- ${LIBOMP_OMPT_SUPPORT}") if(${LIBOMP_OMPT_SUPPORT}) - libomp_say("Use OMPT-blame -- ${LIBOMP_OMPT_BLAME}") - libomp_say("Use OMPT-trace -- ${LIBOMP_OMPT_TRACE}") + libomp_say("Use OMPT-optional -- ${LIBOMP_OMPT_OPTIONAL}") endif() libomp_say("Use Adaptive locks -- ${LIBOMP_USE_ADAPTIVE_LOCKS}") libomp_say("Use quad precision -- ${LIBOMP_USE_QUAD_PRECISION}") Index: runtime/src/exports_so.txt =================================================================== --- runtime/src/exports_so.txt +++ runtime/src/exports_so.txt @@ -25,8 +25,7 @@ # # OMPT API # - ompt_tool; # OMPT initialization interface - ompt_control; # OMPT control interface + ompt_start_tool; # OMPT start interface # icc drops weak attribute at linking step without the following line: Annotate*; # TSAN annotation Index: runtime/src/extractExternal.cpp =================================================================== --- runtime/src/extractExternal.cpp +++ runtime/src/extractExternal.cpp @@ -2,7 +2,6 @@ * extractExternal.cpp */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include #include #include Index: runtime/src/include/50/omp.h.var =================================================================== --- runtime/src/include/50/omp.h.var +++ runtime/src/include/50/omp.h.var @@ -182,6 +182,23 @@ extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void); extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void); + /* OpenMP 5.0 Tool Control */ + typedef enum omp_control_tool_result_t { + omp_control_tool_notool = -2, + omp_control_tool_nocallback = -1, + omp_control_tool_success = 0, + omp_control_tool_ignored = 1 + } omp_control_tool_result_t; + + typedef enum omp_control_tool_t { + omp_control_tool_start = 1, + omp_control_tool_pause = 2, + omp_control_tool_flush = 3, + omp_control_tool_end = 4 + } omp_control_tool_t; + + extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*); + # undef __KAI_KMPC_CONVENTION /* Warning: Index: runtime/src/include/50/omp_lib.h.var =================================================================== --- runtime/src/include/50/omp_lib.h.var +++ runtime/src/include/50/omp_lib.h.var @@ -29,6 +29,8 @@ integer, parameter :: kmp_size_t_kind = int_ptr_kind() integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() integer, parameter :: omp_lock_hint_kind = omp_integer_kind + integer, parameter :: omp_control_tool_kind = omp_integer_kind + integer, parameter :: omp_control_tool_result_kind = omp_integer_kind integer (kind=omp_integer_kind), parameter :: openmp_version = @LIBOMP_OMP_YEAR_MONTH@ integer (kind=omp_integer_kind), parameter :: kmp_version_major = @LIBOMP_VERSION_MAJOR@ @@ -57,6 +59,16 @@ integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_start = 1 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_pause = 2 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_flush = 3 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_end = 4 + + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_notool = -2 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_nocallback = -1 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1 + interface ! *** @@ -494,6 +506,13 @@ integer (kind=omp_lock_hint_kind), value :: hint end subroutine omp_init_nest_lock_with_hint + function omp_control_tool(command, modifier) bind(c) + import + integer (kind=omp_integer_kind) omp_control_tool + integer (kind=omp_control_tool_kind), value :: command + integer (kind=omp_control_tool_kind), value :: modifier + end function omp_control_tool + end interface !DIR$ IF DEFINED (__INTEL_OFFLOAD) Index: runtime/src/include/50/omp_lib.f.var =================================================================== --- runtime/src/include/50/omp_lib.f.var +++ runtime/src/include/50/omp_lib.f.var @@ -32,6 +32,8 @@ integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() integer, parameter :: kmp_cancel_kind = omp_integer_kind integer, parameter :: omp_lock_hint_kind = omp_integer_kind + integer, parameter :: omp_control_tool_kind = omp_integer_kind + integer, parameter :: omp_control_tool_result_kind = omp_integer_kind end module omp_lib_kinds @@ -518,6 +520,13 @@ integer (kind=omp_lock_hint_kind) hint end subroutine omp_init_nest_lock_with_hint + function omp_control_tool(command, modifier) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_control_tool + integer (kind=omp_control_tool_kind) command + integer (kind=omp_control_tool_kind) modifier + end function omp_control_tool + end interface !dec$ if defined(_WIN32) @@ -563,6 +572,7 @@ !dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation !dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device !dec$ attributes alias:'OMP_GET_MAX_TASK_PRIORITY' :: omp_get_max_task_priority +!dec$ attributes alias:'OMP_CONTROL_TOOL' :: omp_control_tool !dec$ attributes alias:'omp_init_lock' :: omp_init_lock !dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint @@ -643,6 +653,7 @@ !dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation !dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device !dec$ attributes alias:'_OMP_GET_MAX_TASK_PRIORTY' :: omp_get_max_task_priority +!dec$ attributes alias:'_OMP_CONTROL_TOOL' :: omp_control_tool !dec$ attributes alias:'_omp_init_lock' :: omp_init_lock !dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint @@ -739,6 +750,7 @@ !dec$ attributes alias:'omp_set_nest_lock_'::omp_set_nest_lock !dec$ attributes alias:'omp_unset_nest_lock_'::omp_unset_nest_lock !dec$ attributes alias:'omp_test_nest_lock_'::omp_test_nest_lock +!dec$ attributes alias:'omp_control_tool_'::omp_control_tool !dec$ attributes alias:'kmp_set_stacksize_'::kmp_set_stacksize !dec$ attributes alias:'kmp_set_stacksize_s_'::kmp_set_stacksize_s @@ -818,6 +830,7 @@ !dec$ attributes alias:'_omp_set_nest_lock_'::omp_set_nest_lock !dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock !dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock +!dec$ attributes alias:'_omp_control_tool_'::omp_control_tool !dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize !dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s Index: runtime/src/include/50/omp_lib.f90.var =================================================================== --- runtime/src/include/50/omp_lib.f90.var +++ runtime/src/include/50/omp_lib.f90.var @@ -28,6 +28,8 @@ integer, parameter :: kmp_affinity_mask_kind = c_intptr_t integer, parameter :: kmp_cancel_kind = omp_integer_kind integer, parameter :: omp_lock_hint_kind = omp_integer_kind + integer, parameter :: omp_control_tool_kind = omp_integer_kind + integer, parameter :: omp_control_tool_result_kind = omp_integer_kind end module omp_lib_kinds @@ -68,6 +70,16 @@ integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_rtm = 131072 integer (kind=omp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 262144 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_start = 1 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_pause = 2 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_flush = 3 + integer (kind=omp_control_tool_kind), parameter :: omp_control_tool_end = 4 + + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_notool = -2 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_nocallback = -1 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_success = 0 + integer (kind=omp_control_tool_result_kind), parameter :: omp_control_tool_ignored = 1 + interface ! *** @@ -519,6 +531,13 @@ integer (kind=omp_lock_hint_kind), value :: hint end subroutine omp_init_nest_lock_with_hint + function omp_control_tool(command, modifier) bind(c) + use omp_lib_kinds + integer (kind=omp_integer_kind) omp_control_tool + integer (kind=omp_control_tool_kind), value :: command + integer (kind=omp_control_tool_kind), value :: modifier + end function omp_control_tool + end interface end module omp_lib Index: runtime/src/include/50/ompt.h.var =================================================================== --- runtime/src/include/50/ompt.h.var +++ runtime/src/include/50/ompt.h.var @@ -10,6 +10,7 @@ *****************************************************************************/ #include +#include @@ -17,21 +18,28 @@ * iteration macros *****************************************************************************/ -#define FOREACH_OMPT_INQUIRY_FN(macro) \ - macro (ompt_enumerate_state) \ - \ - macro (ompt_set_callback) \ - macro (ompt_get_callback) \ - \ - macro (ompt_get_idle_frame) \ - macro (ompt_get_task_frame) \ - \ - macro (ompt_get_state) \ - \ - macro (ompt_get_parallel_id) \ - macro (ompt_get_parallel_team_size) \ - macro (ompt_get_task_id) \ - macro (ompt_get_thread_id) +#define FOREACH_OMPT_INQUIRY_FN(macro) \ + macro (ompt_enumerate_states) \ + macro (ompt_enumerate_mutex_impls) \ + \ + macro (ompt_set_callback) \ + macro (ompt_get_callback) \ + \ + macro (ompt_get_state) \ + \ + macro (ompt_get_parallel_info) \ + macro (ompt_get_task_info) \ + macro (ompt_get_thread_data) \ + macro (ompt_get_unique_id) \ + \ + macro(ompt_get_num_places) \ + macro(ompt_get_place_proc_ids) \ + macro(ompt_get_place_num) \ + macro(ompt_get_partition_place_nums) \ + macro(ompt_get_proc_id) \ + \ + macro(ompt_get_target_info) \ + macro(ompt_get_num_devices) #define FOREACH_OMPT_PLACEHOLDER_FN(macro) \ macro (ompt_idle) \ @@ -40,141 +48,107 @@ macro (ompt_task_wait) \ macro (ompt_mutex_wait) -#define FOREACH_OMPT_STATE(macro) \ +#define FOREACH_OMP_STATE(macro) \ \ - /* first */ \ - macro (ompt_state_first, 0x71) /* initial enumeration state */ \ + /* first available state */ \ + macro (omp_state_undefined, 0x102) /* undefined thread state */ \ \ /* work states (0..15) */ \ - macro (ompt_state_work_serial, 0x00) /* working outside parallel */ \ - macro (ompt_state_work_parallel, 0x01) /* working within parallel */ \ - macro (ompt_state_work_reduction, 0x02) /* performing a reduction */ \ + macro (omp_state_work_serial, 0x000) /* working outside parallel */ \ + macro (omp_state_work_parallel, 0x001) /* working within parallel */ \ + macro (omp_state_work_reduction, 0x002) /* performing a reduction */ \ \ - /* idle (16..31) */ \ - macro (ompt_state_idle, 0x10) /* waiting for work */ \ + /* barrier wait states (16..31) */ \ + macro (omp_state_wait_barrier, 0x010) /* waiting at a barrier */ \ + macro (omp_state_wait_barrier_implicit_parallel, 0x011) \ + /* implicit barrier at the end of parallel region */\ + macro (omp_state_wait_barrier_implicit_workshare, 0x012) \ + /* implicit barrier at the end of worksharing */ \ + macro (omp_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \ + macro (omp_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \ \ - /* overhead states (32..63) */ \ - macro (ompt_state_overhead, 0x20) /* overhead excluding wait states */ \ + /* task wait states (32..63) */ \ + macro (omp_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \ + macro (omp_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \ \ - /* barrier wait states (64..79) */ \ - macro (ompt_state_wait_barrier, 0x40) /* waiting at a barrier */ \ - macro (ompt_state_wait_barrier_implicit, 0x41) /* implicit barrier */ \ - macro (ompt_state_wait_barrier_explicit, 0x42) /* explicit barrier */ \ + /* mutex wait states (64..127) */ \ + macro (omp_state_wait_mutex, 0x040) \ + macro (omp_state_wait_lock, 0x041) /* waiting for lock */ \ + macro (omp_state_wait_critical, 0x042) /* waiting for critical */ \ + macro (omp_state_wait_atomic, 0x043) /* waiting for atomic */ \ + macro (omp_state_wait_ordered, 0x044) /* waiting for ordered */ \ \ - /* task wait states (80..95) */ \ - macro (ompt_state_wait_taskwait, 0x50) /* waiting at a taskwait */ \ - macro (ompt_state_wait_taskgroup, 0x51) /* waiting at a taskgroup */ \ + /* target wait states (128..255) */ \ + macro (omp_state_wait_target, 0x080) /* waiting for target region */ \ + macro (omp_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \ + macro (omp_state_wait_target_update, 0x082) /* waiting for target update operation */ \ \ - /* mutex wait states (96..111) */ \ - macro (ompt_state_wait_lock, 0x60) /* waiting for lock */ \ - macro (ompt_state_wait_nest_lock, 0x61) /* waiting for nest lock */ \ - macro (ompt_state_wait_critical, 0x62) /* waiting for critical */ \ - macro (ompt_state_wait_atomic, 0x63) /* waiting for atomic */ \ - macro (ompt_state_wait_ordered, 0x64) /* waiting for ordered */ \ - macro (ompt_state_wait_single, 0x6F) /* waiting for single region (non-standard!) */ \ + /* misc (256..511) */ \ + macro (omp_state_idle, 0x100) /* waiting for work */ \ + macro (omp_state_overhead, 0x101) /* overhead excluding wait states */ \ \ - /* misc (112..127) */ \ - macro (ompt_state_undefined, 0x70) /* undefined thread state */ + /* implementation-specific states (512..) */ +#define FOREACH_OMPT_MUTEX_IMPL(macro) \ + macro (ompt_mutex_impl_unknown, 0) /* unknown implementatin */ \ + macro (ompt_mutex_impl_spin, 1) /* based on spin */ \ + macro (ompt_mutex_impl_queuing, 2) /* based on some fair policy */ \ + macro (ompt_mutex_impl_speculative, 3) /* based on HW-supported speculation */ + #define FOREACH_OMPT_EVENT(macro) \ \ /*--- Mandatory Events ---*/ \ - macro (ompt_event_parallel_begin, ompt_new_parallel_callback_t, 1) /* parallel begin */ \ - macro (ompt_event_parallel_end, ompt_end_parallel_callback_t, 2) /* parallel end */ \ - \ - macro (ompt_event_task_begin, ompt_new_task_callback_t, 3) /* task begin */ \ - macro (ompt_event_task_end, ompt_task_callback_t, 4) /* task destroy */ \ - \ - macro (ompt_event_thread_begin, ompt_thread_type_callback_t, 5) /* thread begin */ \ - macro (ompt_event_thread_end, ompt_thread_type_callback_t, 6) /* thread end */ \ + macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ + macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \ \ - macro (ompt_event_control, ompt_control_callback_t, 7) /* support control calls */ \ + macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \ + macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \ \ - macro (ompt_event_runtime_shutdown, ompt_callback_t, 8) /* runtime shutdown */ \ - \ - /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ - macro (ompt_event_idle_begin, ompt_thread_callback_t, 9) /* begin idle state */ \ - macro (ompt_event_idle_end, ompt_thread_callback_t, 10) /* end idle state */ \ + macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \ + macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ + macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ \ - macro (ompt_event_wait_barrier_begin, ompt_parallel_callback_t, 11) /* begin wait at barrier */ \ - macro (ompt_event_wait_barrier_end, ompt_parallel_callback_t, 12) /* end wait at barrier */ \ + macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ + macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op*/ \ + macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit*/ \ \ - macro (ompt_event_wait_taskwait_begin, ompt_parallel_callback_t, 13) /* begin wait at taskwait */ \ - macro (ompt_event_wait_taskwait_end, ompt_parallel_callback_t, 14) /* end wait at taskwait */ \ + macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ \ - macro (ompt_event_wait_taskgroup_begin, ompt_parallel_callback_t, 15) /* begin wait at taskgroup */\ - macro (ompt_event_wait_taskgroup_end, ompt_parallel_callback_t, 16) /* end wait at taskgroup */ \ + macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ + macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ \ - macro (ompt_event_release_lock, ompt_wait_callback_t, 17) /* lock release */ \ - macro (ompt_event_release_nest_lock_last, ompt_wait_callback_t, 18) /* last nest lock release */ \ - macro (ompt_event_release_critical, ompt_wait_callback_t, 19) /* critical release */ \ + /*--- Optional Events (blame shifting, ompt_event_unimplemented) ---*/ \ \ - macro (ompt_event_release_atomic, ompt_wait_callback_t, 20) /* atomic release */ \ + macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 14) /* sync region wait begin or end*/ \ \ - macro (ompt_event_release_ordered, ompt_wait_callback_t, 21) /* ordered release */ \ + macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 15) /* mutex released */ \ \ /*--- Optional Events (synchronous events, ompt_event_unimplemented) --- */ \ - macro (ompt_event_implicit_task_begin, ompt_parallel_callback_t, 22) /* implicit task begin */ \ - macro (ompt_event_implicit_task_end, ompt_parallel_callback_t, 23) /* implicit task end */ \ - \ - macro (ompt_event_initial_task_begin, ompt_parallel_callback_t, 24) /* initial task begin */ \ - macro (ompt_event_initial_task_end, ompt_parallel_callback_t, 25) /* initial task end */ \ - \ - macro (ompt_event_task_switch, ompt_task_pair_callback_t, 26) /* task switch */ \ - \ - macro (ompt_event_loop_begin, ompt_new_workshare_callback_t, 27) /* task at loop begin */ \ - macro (ompt_event_loop_end, ompt_parallel_callback_t, 28) /* task at loop end */ \ - \ - macro (ompt_event_sections_begin, ompt_new_workshare_callback_t, 29) /* task at sections begin */\ - macro (ompt_event_sections_end, ompt_parallel_callback_t, 30) /* task at sections end */ \ \ - macro (ompt_event_single_in_block_begin, ompt_new_workshare_callback_t, 31) /* task at single begin*/ \ - macro (ompt_event_single_in_block_end, ompt_parallel_callback_t, 32) /* task at single end */ \ + macro (ompt_callback_task_dependences, ompt_callback_task_dependences_t, 16) /* report task dependences */\ + macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 17) /* report task dependence */\ \ - macro (ompt_event_single_others_begin, ompt_parallel_callback_t, 33) /* task at single begin */ \ - macro (ompt_event_single_others_end, ompt_parallel_callback_t, 34) /* task at single end */ \ + macro (ompt_callback_work, ompt_callback_work_t, 18) /* task at work begin or end*/\ \ - macro (ompt_event_workshare_begin, ompt_new_workshare_callback_t, 35) /* task at workshare begin */\ - macro (ompt_event_workshare_end, ompt_parallel_callback_t, 36) /* task at workshare end */ \ + macro (ompt_callback_master, ompt_callback_master_t, 19) /* task at master begin or end */\ \ - macro (ompt_event_master_begin, ompt_parallel_callback_t, 37) /* task at master begin */ \ - macro (ompt_event_master_end, ompt_parallel_callback_t, 38) /* task at master end */ \ + macro (ompt_callback_target_map, ompt_callback_target_map_t, 20) /* target map */ \ \ - macro (ompt_event_barrier_begin, ompt_parallel_callback_t, 39) /* task at barrier begin */ \ - macro (ompt_event_barrier_end, ompt_parallel_callback_t, 40) /* task at barrier end */ \ + macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 21) /* sync region begin or end */ \ \ - macro (ompt_event_taskwait_begin, ompt_parallel_callback_t, 41) /* task at taskwait begin */ \ - macro (ompt_event_taskwait_end, ompt_parallel_callback_t, 42) /* task at task wait end */ \ + macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 22) /* lock init */ \ + macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 23) /* lock destroy */ \ \ - macro (ompt_event_taskgroup_begin, ompt_parallel_callback_t, 43) /* task at taskgroup begin */\ - macro (ompt_event_taskgroup_end, ompt_parallel_callback_t, 44) /* task at taskgroup end */ \ + macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 24) /* mutex acquire */ \ + macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 25) /* mutex acquired */ \ \ - macro (ompt_event_release_nest_lock_prev, ompt_wait_callback_t, 45) /* prev nest lock release */ \ + macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 26) /* nest lock */ \ \ - macro (ompt_event_wait_lock, ompt_wait_callback_t, 46) /* lock wait */ \ - macro (ompt_event_wait_nest_lock, ompt_wait_callback_t, 47) /* nest lock wait */ \ - macro (ompt_event_wait_critical, ompt_wait_callback_t, 48) /* critical wait */ \ - macro (ompt_event_wait_atomic, ompt_wait_callback_t, 49) /* atomic wait */ \ - macro (ompt_event_wait_ordered, ompt_wait_callback_t, 50) /* ordered wait */ \ + macro (ompt_callback_flush, ompt_callback_flush_t, 27) /* after executing flush */ \ \ - macro (ompt_event_acquired_lock, ompt_wait_callback_t, 51) /* lock acquired */ \ - macro (ompt_event_acquired_nest_lock_first, ompt_wait_callback_t, 52) /* 1st nest lock acquired */ \ - macro (ompt_event_acquired_nest_lock_next, ompt_wait_callback_t, 53) /* next nest lock acquired*/ \ - macro (ompt_event_acquired_critical, ompt_wait_callback_t, 54) /* critical acquired */ \ - macro (ompt_event_acquired_atomic, ompt_wait_callback_t, 55) /* atomic acquired */ \ - macro (ompt_event_acquired_ordered, ompt_wait_callback_t, 56) /* ordered acquired */ \ - \ - macro (ompt_event_init_lock, ompt_wait_callback_t, 57) /* lock init */ \ - macro (ompt_event_init_nest_lock, ompt_wait_callback_t, 58) /* nest lock init */ \ - \ - macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ - macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ - \ - macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ \ - \ - macro (ompt_event_task_dependences, ompt_task_dependences_callback_t, 69) /* report task dependences */\ - macro (ompt_event_task_dependence_pair, ompt_task_pair_callback_t, 70) /* report task dependence pair */ + macro (ompt_callback_cancel, ompt_callback_cancel_t, 28) /*cancel innermost binding region*/\ + macro (ompt_callback_idle, ompt_callback_idle_t, 29) /* begin or end idle state */\ @@ -186,18 +160,20 @@ * identifiers *---------------------*/ -typedef uint64_t ompt_thread_id_t; -#define ompt_thread_id_none ((ompt_thread_id_t) 0) /* non-standard */ +typedef uint64_t ompt_id_t; +#define ompt_id_none 0 -typedef uint64_t ompt_task_id_t; -#define ompt_task_id_none ((ompt_task_id_t) 0) /* non-standard */ +typedef union ompt_data_u { + uint64_t value; /* data initialized by runtime to unique id */ + void *ptr; /* pointer under tool control */ +} ompt_data_t; -typedef uint64_t ompt_parallel_id_t; -#define ompt_parallel_id_none ((ompt_parallel_id_t) 0) /* non-standard */ +static const ompt_data_t ompt_data_none = {0}; typedef uint64_t ompt_wait_id_t; -#define ompt_wait_id_none ((ompt_wait_id_t) 0) /* non-standard */ +static const ompt_wait_id_t ompt_wait_id_none = 0; +typedef void ompt_device_t; /*--------------------- * ompt_frame_t @@ -235,35 +211,44 @@ *---------------------*/ typedef enum { -#define ompt_state_macro(state, code) state = code, - FOREACH_OMPT_STATE(ompt_state_macro) -#undef ompt_state_macro -} ompt_state_t; +#define omp_state_macro(state, code) state = code, + FOREACH_OMP_STATE(omp_state_macro) +#undef omp_state_macro +} omp_state_t; /*--------------------- * runtime events *---------------------*/ -typedef enum { +typedef enum ompt_callbacks_e{ #define ompt_event_macro(event, callback, eventid) event = eventid, FOREACH_OMPT_EVENT(ompt_event_macro) #undef ompt_event_macro -} ompt_event_t; +} ompt_callbacks_t; /*--------------------- * set callback results *---------------------*/ -typedef enum { - ompt_set_result_registration_error = 0, - ompt_set_result_event_may_occur_no_callback = 1, - ompt_set_result_event_never_occurs = 2, - ompt_set_result_event_may_occur_callback_some = 3, - ompt_set_result_event_may_occur_callback_always = 4, +typedef enum ompt_set_result_e { + ompt_set_error = 0, + ompt_set_never = 1, + ompt_set_sometimes = 2, + ompt_set_sometimes_paired = 3, + ompt_set_always = 4 } ompt_set_result_t; +/*---------------------- + * mutex implementations + *----------------------*/ +typedef enum ompt_mutex_impl_e { +#define ompt_mutex_impl_macro(impl, code) impl = code, + FOREACH_OMPT_MUTEX_IMPL(ompt_mutex_impl_macro) +#undef ompt_mutex_impl_macro +} ompt_mutex_impl_t; + /***************************************************************************** * callback signatures @@ -273,14 +258,10 @@ typedef void (*ompt_interface_fn_t)(void); typedef ompt_interface_fn_t (*ompt_function_lookup_t)( - const char * /* entry point to look up */ + const char * /* entry point to look up */ ); /* threads */ -typedef void (*ompt_thread_callback_t) ( - ompt_thread_id_t thread_id /* ID of thread */ -); - typedef enum { ompt_thread_initial = 1, // start the enumeration at 1 ompt_thread_worker = 2, @@ -288,78 +269,262 @@ } ompt_thread_type_t; typedef enum { - ompt_invoker_program = 0, /* program invokes master task */ - ompt_invoker_runtime = 1 /* runtime invokes master task */ + ompt_invoker_program = 1, /* program invokes master task */ + ompt_invoker_runtime = 2 /* runtime invokes master task */ } ompt_invoker_t; -typedef void (*ompt_thread_type_callback_t) ( - ompt_thread_type_t thread_type, /* type of thread */ - ompt_thread_id_t thread_id /* ID of thread */ +typedef void (*ompt_callback_thread_begin_t) ( + ompt_thread_type_t thread_type, /* type of thread */ + ompt_data_t *thread_data /* data of thread */ +); + +typedef void (*ompt_callback_thread_end_t) ( + ompt_data_t *thread_data /* data of thread */ ); typedef void (*ompt_wait_callback_t) ( - ompt_wait_id_t wait_id /* wait id */ + ompt_wait_id_t wait_id /* wait data */ ); /* parallel and workshares */ -typedef void (*ompt_parallel_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t task_id /* id of task */ +typedef enum ompt_scope_endpoint_e { + ompt_scope_begin = 1, + ompt_scope_end = 2 +} ompt_scope_endpoint_t; + + +/* implicit task */ +typedef void (*ompt_callback_implicit_task_t) ( + ompt_scope_endpoint_t endpoint, /* endpoint of implicit task */ + ompt_data_t *parallel_data, /* data of parallel region */ + ompt_data_t *task_data, /* data of implicit task */ + unsigned int team_size, /* team size */ + unsigned int thread_num /* thread number of calling thread */ ); -typedef void (*ompt_new_workshare_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t parent_task_id, /* id of parent task */ - void *workshare_function /* pointer to outlined function */ +typedef void (*ompt_callback_parallel_begin_t) ( + ompt_data_t *parent_task_data, /* data of parent task */ + const ompt_frame_t *parent_frame, /* frame data of parent task */ + ompt_data_t *parallel_data, /* data of parallel region */ + unsigned int requested_team_size, /* requested number of threads in team */ + ompt_invoker_t invoker, /* invoker of master task */ + const void *codeptr_ra /* return address of runtime call */ ); -typedef void (*ompt_new_parallel_callback_t) ( - ompt_task_id_t parent_task_id, /* id of parent task */ - ompt_frame_t *parent_task_frame, /* frame data of parent task */ - ompt_parallel_id_t parallel_id, /* id of parallel region */ - uint32_t requested_team_size, /* number of threads in team */ - void *parallel_function, /* pointer to outlined function */ - ompt_invoker_t invoker /* who invokes master task? */ +typedef void (*ompt_callback_parallel_end_t) ( + ompt_data_t *parallel_data, /* data of parallel region */ + ompt_data_t *task_data, /* data of task */ + ompt_invoker_t invoker, /* invoker of master task */ + const void *codeptr_ra /* return address of runtime call */ ); -typedef void (*ompt_end_parallel_callback_t) ( - ompt_parallel_id_t parallel_id, /* id of parallel region */ - ompt_task_id_t task_id, /* id of task */ - ompt_invoker_t invoker /* who invokes master task? */ +/* tasks */ +typedef enum ompt_task_type_e { + ompt_task_initial = 0x1, + ompt_task_implicit = 0x2, + ompt_task_explicit = 0x4, + ompt_task_target = 0x8, + ompt_task_undeferred = 0x8000000, + ompt_task_untied = 0x10000000, + ompt_task_final = 0x20000000, + ompt_task_mergeable = 0x40000000, + ompt_task_merged = 0x80000000 +} ompt_task_type_t; + +typedef enum ompt_task_status_e { + ompt_task_complete = 1, + ompt_task_yield = 2, + ompt_task_cancel = 3, + ompt_task_others = 4 +} ompt_task_status_t; + +typedef void (*ompt_callback_task_schedule_t) ( + ompt_data_t *prior_task_data, /* data of prior task */ + ompt_task_status_t prior_task_status, /* status of prior task */ + ompt_data_t *next_task_data /* data of next task */ ); -/* tasks */ -typedef void (*ompt_task_callback_t) ( - ompt_task_id_t task_id /* id of task */ +typedef void (*ompt_callback_task_create_t) ( + ompt_data_t *parent_task_data, /* data of parent task */ + const ompt_frame_t *parent_frame, /* frame data for parent task */ + ompt_data_t *new_task_data, /* data of created task */ + int type, /* type of created task */ + int has_dependences, /* created task has dependences */ + const void *codeptr_ra /* return address of runtime call */ ); -typedef void (*ompt_task_pair_callback_t) ( - ompt_task_id_t first_task_id, - ompt_task_id_t second_task_id +/* task dependences */ +typedef void (*ompt_callback_task_dependences_t) ( + ompt_data_t *task_data, /* data of task */ + const ompt_task_dependence_t *deps, /* dependences of task */ + int ndeps /* dependences count of task */ ); -typedef void (*ompt_new_task_callback_t) ( - ompt_task_id_t parent_task_id, /* id of parent task */ - ompt_frame_t *parent_task_frame, /* frame data for parent task */ - ompt_task_id_t new_task_id, /* id of created task */ - void *task_function /* pointer to outlined function */ +typedef void (*ompt_callback_task_dependence_t) ( + ompt_data_t *src_task_data, /* data of source task */ + ompt_data_t *sink_task_data /* data of sink task */ ); -/* task dependences */ -typedef void (*ompt_task_dependences_callback_t) ( - ompt_task_id_t task_id, /* ID of task with dependences */ - const ompt_task_dependence_t *deps,/* vector of task dependences */ - int ndeps /* number of dependences */ +/* target and device */ +typedef enum ompt_target_type_e { + ompt_target = 1, + ompt_target_enter_data = 2, + ompt_target_exit_data = 3, + ompt_target_update = 4 +} ompt_target_type_t; + +typedef void (*ompt_callback_target_t) ( + ompt_target_type_t kind, + ompt_scope_endpoint_t endpoint, + uint64_t device_num, + ompt_data_t *task_data, + ompt_id_t target_id, + const void *codeptr_ra ); -/* program */ -typedef void (*ompt_control_callback_t) ( - uint64_t command, /* command of control call */ - uint64_t modifier /* modifier of control call */ +typedef enum ompt_target_data_op_e { + ompt_target_data_alloc = 1, + ompt_target_data_transfer_to_dev = 2, + ompt_target_data_transfer_from_dev = 3, + ompt_target_data_delete = 4 +} ompt_target_data_op_t; + +typedef void (*ompt_callback_target_data_op_t) ( + ompt_id_t target_id, + ompt_id_t host_op_id, + ompt_target_data_op_t optype, + void *host_addr, + void *device_addr, + size_t bytes ); -typedef void (*ompt_callback_t)(void); +typedef void (*ompt_callback_target_submit_t) ( + ompt_id_t target_id, + ompt_id_t host_op_id +); +typedef void (*ompt_callback_target_map_t) ( + ompt_id_t target_id, + unsigned int nitems, + void **host_addr, + void **device_addr, + size_t *bytes, + unsigned int *mapping_flags +); + +typedef void (*ompt_callback_device_initialize_t) ( + uint64_t device_num, + const char *type, + ompt_device_t *device, + ompt_function_lookup_t lookup, + const char *documentation +); + +typedef void (*ompt_callback_device_finalize_t) ( + uint64_t device_num +); + +/* control_tool */ +typedef int (*ompt_callback_control_tool_t) ( + uint64_t command, /* command of control call */ + uint64_t modifier, /* modifier of control call */ + void *arg, /* argument of control call */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef enum ompt_mutex_kind_e { + ompt_mutex = 0x10, + ompt_mutex_lock = 0x11, + ompt_mutex_nest_lock = 0x12, + ompt_mutex_critical = 0x13, + ompt_mutex_atomic = 0x14, + ompt_mutex_ordered = 0x20 +} ompt_mutex_kind_t; + +typedef void (*ompt_callback_mutex_acquire_t) ( + ompt_mutex_kind_t kind, /* mutex kind */ + unsigned int hint, /* mutex hint */ + unsigned int impl, /* mutex implementation */ + ompt_wait_id_t wait_id, /* id of object being awaited */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef void (*ompt_callback_mutex_t) ( + ompt_mutex_kind_t kind, /* mutex kind */ + ompt_wait_id_t wait_id, /* id of object being awaited */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef void (*ompt_callback_nest_lock_t) ( + ompt_scope_endpoint_t endpoint, /* endpoint of nested lock */ + ompt_wait_id_t wait_id, /* id of object being awaited */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef void (*ompt_callback_master_t) ( + ompt_scope_endpoint_t endpoint, /* endpoint of master region */ + ompt_data_t *parallel_data, /* data of parallel region */ + ompt_data_t *task_data, /* data of task */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef void (*ompt_callback_idle_t) ( + ompt_scope_endpoint_t endpoint /* endpoint of idle time */ +); + +typedef enum ompt_work_type_e { + ompt_work_loop = 1, + ompt_work_sections = 2, + ompt_work_single_executor = 3, + ompt_work_single_other = 4, + ompt_work_workshare = 5, + ompt_work_distribute = 6, + ompt_work_taskloop = 7 +} ompt_work_type_t; + +typedef void (*ompt_callback_work_t) ( + ompt_work_type_t wstype, /* type of work region */ + ompt_scope_endpoint_t endpoint, /* endpoint of work region */ + ompt_data_t *parallel_data, /* data of parallel region */ + ompt_data_t *task_data, /* data of task */ + uint64_t count, /* quantity of work */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef enum ompt_sync_region_kind_e { + ompt_sync_region_barrier = 1, + ompt_sync_region_taskwait = 2, + ompt_sync_region_taskgroup = 3 +} ompt_sync_region_kind_t; + +typedef void (*ompt_callback_sync_region_t) ( + ompt_sync_region_kind_t kind, /* kind of sync region */ + ompt_scope_endpoint_t endpoint, /* endpoint of sync region */ + ompt_data_t *parallel_data, /* data of parallel region */ + ompt_data_t *task_data, /* data of task */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef enum ompt_cancel_flag_e { + ompt_cancel_parallel = 0x1, + ompt_cancel_sections = 0x2, + ompt_cancel_do = 0x4, + ompt_cancel_taskgroup = 0x8, + ompt_cancel_activated = 0x10, + ompt_cancel_detected = 0x20, + ompt_cancel_discarded_task = 0x40 +} ompt_cancel_flag_t; + +typedef void (*ompt_callback_cancel_t) ( + ompt_data_t *task_data, /* data of task */ + int flags, /* cancel flags */ + const void *codeptr_ra /* return address of runtime call */ +); + +typedef void (*ompt_callback_flush_t) ( + ompt_data_t *thread_data, /* data of thread */ + const void *codeptr_ra /* return address of runtime call */ +); /**************************************************************************** * ompt API @@ -381,33 +546,48 @@ ***************************************************************************/ /* state */ -OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( - ompt_wait_id_t *ompt_wait_id +OMPT_API_FUNCTION(omp_state_t, ompt_get_state, ( + ompt_wait_id_t *wait_id )); /* thread */ -OMPT_API_FUNCTION(ompt_thread_id_t, ompt_get_thread_id, (void)); - -OMPT_API_FUNCTION(void *, ompt_get_idle_frame, (void)); +OMPT_API_FUNCTION(ompt_data_t*, ompt_get_thread_data, (void)); /* parallel region */ -OMPT_API_FUNCTION(ompt_parallel_id_t, ompt_get_parallel_id, ( - int ancestor_level +OMPT_API_FUNCTION(int, ompt_get_parallel_info, ( + int ancestor_level, + ompt_data_t **parallel_data, + int *team_size )); -OMPT_API_FUNCTION(int, ompt_get_parallel_team_size, ( - int ancestor_level +/* task */ +OMPT_API_FUNCTION(int, ompt_get_task_info, ( + int ancestor_level, + int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num )); -/* task */ -OMPT_API_FUNCTION(ompt_task_id_t, ompt_get_task_id, ( - int depth +/* places */ +OMPT_API_FUNCTION(int, ompt_get_num_places, (void)); + +OMPT_API_FUNCTION(int, ompt_get_place_proc_ids, ( + int place_num, + int ids_size, + int *ids )); -OMPT_API_FUNCTION(ompt_frame_t *, ompt_get_task_frame, ( - int depth +OMPT_API_FUNCTION(int, ompt_get_place_num, (void)); + +OMPT_API_FUNCTION(int, ompt_get_partition_place_nums, ( + int place_nums_size, + int *place_nums )); +/* proc_id */ +OMPT_API_FUNCTION(int, ompt_get_proc_id, (void)); /**************************************************************************** @@ -445,25 +625,35 @@ * INITIALIZATION FUNCTIONS ***************************************************************************/ -OMPT_API_FUNCTION(void, ompt_initialize, ( +typedef struct ompt_fns_t ompt_fns_t; + +OMPT_API_FUNCTION(int, ompt_initialize, ( ompt_function_lookup_t ompt_fn_lookup, - const char *runtime_version, - unsigned int ompt_version + ompt_fns_t *fns )); +OMPT_API_FUNCTION(void, ompt_finalize, ( + ompt_fns_t *fns +)); + +struct ompt_fns_t { + ompt_initialize_t initialize; + ompt_finalize_t finalize; +}; /* initialization interface to be defined by tool */ -ompt_initialize_t ompt_tool(void); +#ifdef _WIN32 +__declspec(dllexport) +#endif +ompt_fns_t * ompt_start_tool( + unsigned int omp_version, + const char * runtime_version +); -typedef enum opt_init_mode_e { - ompt_init_mode_never = 0, - ompt_init_mode_false = 1, - ompt_init_mode_true = 2, - ompt_init_mode_always = 3 -} ompt_init_mode_t; +typedef void (*ompt_callback_t)(void); OMPT_API_FUNCTION(int, ompt_set_callback, ( - ompt_event_t event, + ompt_callbacks_t which, ompt_callback_t callback )); @@ -477,7 +667,7 @@ OMPT_API_FUNCTION(int, ompt_get_callback, ( - ompt_event_t event, + ompt_callbacks_t which, ompt_callback_t *callback )); @@ -487,29 +677,37 @@ * MISCELLANEOUS FUNCTIONS ***************************************************************************/ -/* control */ -// FIXME: remove workaround for clang -#if !defined(__clang__) && defined(_OPENMP) && (_OPENMP >= 201307) -#pragma omp declare target -#endif -void ompt_control( - uint64_t command, - uint64_t modifier -); -#if !defined(__clang__) && defined(_OPENMP) && (_OPENMP >= 201307) -#pragma omp end declare target -#endif - /* state enumeration */ -OMPT_API_FUNCTION(int, ompt_enumerate_state, ( +OMPT_API_FUNCTION(int, ompt_enumerate_states, ( int current_state, int *next_state, const char **next_state_name )); +/* mutex implementation enumeration */ +OMPT_API_FUNCTION(int, ompt_enumerate_mutex_impls, ( + int current_impl, + int *next_impl, + const char **next_impl_name +)); + +/* get_unique_id */ +OMPT_API_FUNCTION(uint64_t, ompt_get_unique_id, (void)); + #ifdef __cplusplus }; #endif -#endif +/**************************************************************************** + * TARGET + ***************************************************************************/ + + OMPT_API_FUNCTION(int, ompt_get_target_info, ( + uint64_t *device_num, + ompt_id_t *target_id, + ompt_id_t *host_op_id +)); + + OMPT_API_FUNCTION(int, ompt_get_num_devices, (void)); +#endif /* __OMPT__ */ Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -3,7 +3,6 @@ * kmp.h -- KPTS runtime header file. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -13,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_H #define KMP_H @@ -202,6 +200,10 @@ #define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140 #define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0 +#define KMP_IDENT_WORK_LOOP 0x200 // static loop +#define KMP_IDENT_WORK_SECTIONS 0x400 // sections +#define KMP_IDENT_WORK_DISTRIBUTE 0x800 // distribute + /*! * The ident structure that describes a source location. */ @@ -800,6 +802,10 @@ extern int __kmp_hws_requested; extern int __kmp_hws_abs_flag; // absolute or per-item number requested +#if OMP_50_ENABLED && LIBOMP_OMPT_SUPPORT +extern char const *__kmp_tool_libraries; +#endif // OMP_50_ENABLED && LIBOMP_OMPT_SUPPORT + /* ------------------------------------------------------------------------ */ #define KMP_PAD(type, sz) \ @@ -3316,7 +3322,7 @@ extern kmp_team_t * __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, + ompt_data_t ompt_parallel_data, #endif kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs, int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); @@ -3324,7 +3330,7 @@ extern kmp_team_t * __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, + ompt_id_t ompt_parallel_id, #endif kmp_internal_control_t *new_icvs, int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); @@ -3364,9 +3370,6 @@ }; extern int __kmp_fork_call(ident_t *loc, int gtid, enum fork_context_e fork_context, kmp_int32 argc, -#if OMPT_SUPPORT - void *unwrapped_task, -#endif microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX Index: runtime/src/kmp_affinity.h =================================================================== --- runtime/src/kmp_affinity.h +++ runtime/src/kmp_affinity.h @@ -2,7 +2,6 @@ * kmp_affinity.h -- header for affinity management */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_AFFINITY_H #define KMP_AFFINITY_H Index: runtime/src/kmp_affinity.cpp =================================================================== --- runtime/src/kmp_affinity.cpp +++ runtime/src/kmp_affinity.cpp @@ -2,7 +2,6 @@ * kmp_affinity.cpp -- affinity management */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_affinity.h" #include "kmp_i18n.h" @@ -25,7 +23,6 @@ void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); } - void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { kmp_uint32 depth; // The test below is true if affinity is available, but set to "none". Need to Index: runtime/src/kmp_alloc.cpp =================================================================== --- runtime/src/kmp_alloc.cpp +++ runtime/src/kmp_alloc.cpp @@ -2,7 +2,6 @@ * kmp_alloc.cpp -- private/shared dynamic memory allocation and management */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_io.h" #include "kmp_wrapper_malloc.h" Index: runtime/src/kmp_atomic.h =================================================================== --- runtime/src/kmp_atomic.h +++ runtime/src/kmp_atomic.h @@ -2,7 +2,6 @@ * kmp_atomic.h - ATOMIC header file */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_ATOMIC_H #define KMP_ATOMIC_H @@ -363,19 +361,20 @@ static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck, kmp_int32 gtid) { -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_wait_atomic)((ompt_wait_id_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_atomic, 0, ompt_mutex_impl_queuing, (ompt_wait_id_t)lck, + OMPT_GET_RETURN_ADDRESS(0)); } #endif __kmp_acquire_queuing_lock(lck, gtid); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)( - (ompt_wait_id_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); } #endif } @@ -388,10 +387,10 @@ static inline void __kmp_release_atomic_lock(kmp_atomic_lock_t *lck, kmp_int32 gtid) { __kmp_release_queuing_lock(lck, gtid); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_release_atomic)( - (ompt_wait_id_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); } #endif } Index: runtime/src/kmp_atomic.cpp =================================================================== --- runtime/src/kmp_atomic.cpp +++ runtime/src/kmp_atomic.cpp @@ -2,7 +2,6 @@ * kmp_atomic.cpp -- ATOMIC implementation routines */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp_atomic.h" #include "kmp.h" // TRUE, asm routines prototypes Index: runtime/src/kmp_barrier.cpp =================================================================== --- runtime/src/kmp_barrier.cpp +++ runtime/src/kmp_barrier.cpp @@ -2,7 +2,6 @@ * kmp_barrier.cpp */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,13 +11,14 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_wait_release.h" #include "kmp_itt.h" #include "kmp_os.h" #include "kmp_stats.h" - +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif #if KMP_MIC #include @@ -87,8 +87,7 @@ int nproc = this_thr->th.th_team_nproc; int i; // Don't have to worry about sleep bit here or atomic since team setting - kmp_uint64 new_state = - team_bar->b_arrived + KMP_BARRIER_STATE_BUMP; + kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP; // Collect all the worker team member threads. for (i = 1; i < nproc; ++i) { @@ -1228,8 +1227,9 @@ int status = 0; ident_t *loc = __kmp_threads[gtid]->th.th_ident; #if OMPT_SUPPORT - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; + ompt_data_t *my_task_data; + ompt_data_t *my_parallel_data; + void *return_address; #endif KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid, @@ -1237,28 +1237,26 @@ ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); #if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_BLAME - my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - -#if OMPT_TRACE - if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) { - if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) { - ompt_callbacks.ompt_callback(ompt_event_single_others_end)( - my_parallel_id, my_task_id); - } + if (ompt_enabled.enabled) { +#if OMPT_OPTIONAL + my_task_data = OMPT_CUR_TASK_DATA(this_thr); + my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); + return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, + my_task_data, return_address); } -#endif - if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(my_parallel_id, - my_task_id); + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, + my_task_data, return_address); } #endif // It is OK to report the barrier state after the barrier begin callback. // According to the OMPT specification, a compliant implementation may // even delay reporting this state until the barrier begins to wait. - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; + this_thr->th.ompt_thread_info.state = omp_state_wait_barrier; } #endif @@ -1493,14 +1491,20 @@ __kmp_tid_from_gtid(gtid), status)); #if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_BLAME - if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_end)(my_parallel_id, - my_task_id); + if (ompt_enabled.enabled) { +#if OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_end, my_parallel_data, + my_task_data, return_address); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_end, my_parallel_data, + my_task_data, return_address); } #endif - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; + this_thr->th.ompt_thread_info.state = omp_state_work_parallel; } #endif ANNOTATE_BARRIER_END(&team->t.t_bar); @@ -1597,14 +1601,31 @@ ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); #if OMPT_SUPPORT -#if OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } + ompt_data_t *my_task_data; + ompt_data_t *my_parallel_data; + if (ompt_enabled.enabled) { +#if OMPT_OPTIONAL + void *codeptr = NULL; + int ds_tid = this_thr->th.th_info.ds.ds_tid; + if (KMP_MASTER_TID(ds_tid) && + (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || + ompt_callbacks.ompt_callback(ompt_callback_sync_region))) + codeptr = team->t.ompt_team_info.master_return_address; + my_task_data = OMPT_CUR_TASK_DATA(this_thr); + my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, + my_task_data, codeptr); + } + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, + my_task_data, codeptr); + } #endif - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; + this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit; + } #endif if (__kmp_tasking_mode == tskm_extra_barrier) { @@ -1762,20 +1783,6 @@ KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid)); -#if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_BLAME - if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } -#endif - - // return to default state - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif ANNOTATE_BARRIER_END(&team->t.t_bar); } @@ -1873,6 +1880,39 @@ } } +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + if (this_thr->th.ompt_thread_info.state == + omp_state_wait_barrier_implicit) { + int ds_tid = this_thr->th.th_info.ds.ds_tid; + ompt_data_t *tId = (team) ? OMPT_CUR_TASK_DATA(this_thr) + : &(this_thr->th.ompt_thread_info.task_data); + this_thr->th.ompt_thread_info.state = omp_state_overhead; +#if OMPT_OPTIONAL + void *codeptr = NULL; + if (KMP_MASTER_TID(ds_tid) && + (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || + ompt_callbacks.ompt_callback(ompt_callback_sync_region))) + codeptr = team->t.ompt_team_info.master_return_address; + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr); + } +#endif + if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, tId, 0, ds_tid); + } + // return to idle state + this_thr->th.ompt_thread_info.state = omp_state_overhead; + } + } +#endif + // Early exit for reaping threads releasing forkjoin barrier if (TCR_4(__kmp_global.g.g_done)) { this_thr->th.th_task_team = NULL; Index: runtime/src/kmp_cancel.cpp =================================================================== --- runtime/src/kmp_cancel.cpp +++ runtime/src/kmp_cancel.cpp @@ -8,11 +8,13 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_i18n.h" #include "kmp_io.h" #include "kmp_str.h" +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif #if OMP_40_ENABLED @@ -52,11 +54,25 @@ kmp_int32 old = KMP_COMPARE_AND_STORE_RET32( &(this_team->t.t_cancel_request), cancel_noreq, cncl_kind); if (old == cancel_noreq || old == cncl_kind) { - // printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n", - // this_team->t.t_cancel_request, - // &(this_team->t.t_cancel_request)); - // we do not have a cancellation request in this team or we do have - // one that matches the current request -> cancel +// we do not have a cancellation request in this team or we do have +// one that matches the current request -> cancel +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_cancel) { + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, + NULL); + ompt_cancel_flag_t type = ompt_cancel_parallel; + if (cncl_kind == cancel_parallel) + type = ompt_cancel_parallel; + else if (cncl_kind == cancel_loop) + type = ompt_cancel_do; + else if (cncl_kind == cancel_sections) + type = ompt_cancel_sections; + ompt_callbacks.ompt_callback(ompt_callback_cancel)( + task_data, type | ompt_cancel_activated, + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif return 1 /* true */; } break; @@ -76,8 +92,18 @@ kmp_int32 old = KMP_COMPARE_AND_STORE_RET32( &(taskgroup->cancel_request), cancel_noreq, cncl_kind); if (old == cancel_noreq || old == cncl_kind) { - // we do not have a cancellation request in this taskgroup or we do - // have one that matches the current request -> cancel +// we do not have a cancellation request in this taskgroup or we do +// have one that matches the current request -> cancel +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_cancel) { + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, + NULL); + ompt_callbacks.ompt_callback(ompt_callback_cancel)( + task_data, ompt_cancel_taskgroup | ompt_cancel_activated, + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif return 1 /* true */; } } else { @@ -135,8 +161,25 @@ KMP_DEBUG_ASSERT(this_team); if (this_team->t.t_cancel_request) { if (cncl_kind == this_team->t.t_cancel_request) { - // the request in the team structure matches the type of - // cancellation point so we can cancel +// the request in the team structure matches the type of +// cancellation point so we can cancel +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_cancel) { + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, + NULL); + ompt_cancel_flag_t type = ompt_cancel_parallel; + if (cncl_kind == cancel_parallel) + type = ompt_cancel_parallel; + else if (cncl_kind == cancel_loop) + type = ompt_cancel_do; + else if (cncl_kind == cancel_sections) + type = ompt_cancel_sections; + ompt_callbacks.ompt_callback(ompt_callback_cancel)( + task_data, type | ompt_cancel_detected, + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif return 1 /* true */; } KMP_ASSERT(0 /* false */); @@ -159,7 +202,18 @@ taskgroup = task->td_taskgroup; if (taskgroup) { - // return the current status of cancellation for the taskgroup +// return the current status of cancellation for the taskgroup +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_cancel && + !!taskgroup->cancel_request) { + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, + NULL); + ompt_callbacks.ompt_callback(ompt_callback_cancel)( + task_data, ompt_cancel_taskgroup | ompt_cancel_detected, + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif return !!taskgroup->cancel_request; } else { // if a cancellation point is encountered by a task that does not Index: runtime/src/kmp_config.h.cmake =================================================================== --- runtime/src/kmp_config.h.cmake +++ runtime/src/kmp_config.h.cmake @@ -45,10 +45,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#cmakedefine01 LIBOMP_OMPT_BLAME -#define OMPT_BLAME LIBOMP_OMPT_BLAME -#cmakedefine01 LIBOMP_OMPT_TRACE -#define OMPT_TRACE LIBOMP_OMPT_TRACE +#cmakedefine01 LIBOMP_OMPT_OPTIONAL +#define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS #define KMP_USE_ADAPTIVE_LOCKS LIBOMP_USE_ADAPTIVE_LOCKS #define KMP_DEBUG_ADAPTIVE_LOCKS 0 Index: runtime/src/kmp_csupport.cpp =================================================================== --- runtime/src/kmp_csupport.cpp +++ runtime/src/kmp_csupport.cpp @@ -2,7 +2,6 @@ * kmp_csupport.cpp -- kfront linkage support for OpenMP. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "omp.h" /* extern "C" declarations of user-visible routines */ #include "kmp.h" #include "kmp_error.h" @@ -280,7 +278,7 @@ #if OMPT_SUPPORT ompt_frame_t *ompt_frame; - if (ompt_enabled) { + if (ompt_enabled.enabled) { kmp_info_t *master_th = __kmp_threads[gtid]; kmp_team_t *parent_team = master_th->th.th_team; ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; @@ -291,7 +289,8 @@ ompt_frame = &( parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); } - ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -299,9 +298,6 @@ SSC_MARK_FORKING(); #endif __kmp_fork_call(loc, gtid, fork_context_intel, argc, -#if OMPT_SUPPORT - VOLATILE_CAST(void *) microtask, // "unwrapped" task -#endif VOLATILE_CAST(microtask_t) microtask, // "wrapped" task VOLATILE_CAST(launch_t) __kmp_invoke_task_func, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ @@ -373,11 +369,11 @@ #if OMPT_SUPPORT kmp_team_t *parent_team = this_thr->th.th_team; int tid = __kmp_tid_from_gtid(gtid); - if (ompt_enabled) { + if (ompt_enabled.enabled) { parent_team->t.t_implicit_task_taskdata[tid] - .ompt_task_info.frame.reenter_runtime_frame = - __builtin_frame_address(1); + .ompt_task_info.frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif // check if __kmpc_push_num_teams called, set default number of teams @@ -390,9 +386,6 @@ KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); __kmp_fork_call(loc, gtid, fork_context_intel, argc, -#if OMPT_SUPPORT - VOLATILE_CAST(void *) microtask, // "unwrapped" task -#endif VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, @@ -435,9 +428,12 @@ when the condition is false. */ void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { - // The implementation is now in kmp_runtime.cpp so that it can share static - // functions with kmp_fork_call since the tasks to be done are similar in - // each case. +// The implementation is now in kmp_runtime.cpp so that it can share static +// functions with kmp_fork_call since the tasks to be done are similar in +// each case. +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif __kmp_serialized_parallel(loc, global_tid); } @@ -484,6 +480,30 @@ KMP_DEBUG_ASSERT(serial_team->t.t_threads); KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); +#if OMPT_SUPPORT + if (ompt_enabled.enabled && + this_thr->th.ompt_thread_info.state != omp_state_overhead) { + OMPT_CUR_TASK_INFO(this_thr)->frame.exit_runtime_frame = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, + __kmp_tid_from_gtid(global_tid)); + } + + // reset clear the task id only after unlinking the task + ompt_data_t *parent_task_data; + __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); + + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, + ompt_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); + } + __ompt_lw_taskteam_unlink(this_thr); + this_thr->th.ompt_thread_info.state = omp_state_overhead; + } +#endif + /* If necessary, pop the internal control stack values and replace the team * values */ top = serial_team->t.t_control_stack_top; @@ -556,6 +576,12 @@ if (__kmp_env_consistency_check) __kmp_pop_parallel(global_tid, NULL); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + this_thr->th.ompt_thread_info.state = + ((this_thr->th.th_team_serialized) ? omp_state_work_serial + : omp_state_work_parallel); +#endif } /*! @@ -619,6 +645,13 @@ #else #error Unknown or unsupported architecture #endif + +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_flush) { + ompt_callbacks.ompt_callback(ompt_callback_flush)( + __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); + } +#endif } /* -------------------------------------------------------------------------- */ @@ -644,12 +677,13 @@ __kmp_check_barrier(global_tid, ct_barrier, loc); } -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT ompt_frame_t *ompt_frame; - if (ompt_enabled) { - ompt_frame = __ompt_get_task_frame_internal(0); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->reenter_runtime_frame == NULL) - ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); } #endif __kmp_threads[global_tid]->th.th_ident = loc; @@ -661,8 +695,8 @@ // 4) no sync is required __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { ompt_frame->reenter_runtime_frame = NULL; } #endif @@ -689,16 +723,17 @@ status = 1; } -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL if (status) { - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_begin)) { + if (ompt_enabled.ompt_callback_master) { kmp_info_t *this_thr = __kmp_threads[global_tid]; kmp_team_t *team = this_thr->th.th_team; int tid = __kmp_tid_from_gtid(global_tid); - ompt_callbacks.ompt_callback(ompt_event_master_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + ompt_callbacks.ompt_callback(ompt_callback_master)( + ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + OMPT_GET_RETURN_ADDRESS(0)); } } #endif @@ -734,14 +769,15 @@ KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); KMP_POP_PARTITIONED_TIMER(); -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL kmp_info_t *this_thr = __kmp_threads[global_tid]; kmp_team_t *team = this_thr->th.th_team; - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_end)) { + if (ompt_enabled.ompt_callback_master) { int tid = __kmp_tid_from_gtid(global_tid); - ompt_callbacks.ompt_callback(ompt_event_master_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + ompt_callbacks.ompt_callback(ompt_callback_master)( + ompt_scope_end, &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + OMPT_GET_RETURN_ADDRESS(0)); } #endif @@ -778,16 +814,24 @@ th = __kmp_threads[gtid]; -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + kmp_team_t *team; + ompt_wait_id_t lck; + void *codeptr_ra; + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + team = __kmp_team_from_gtid(gtid); + lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value; /* OMPT state update */ - th->th.ompt_thread_info.wait_id = (uint64_t)loc; - th->th.ompt_thread_info.state = ompt_state_wait_ordered; + th->th.ompt_thread_info.wait_id = lck; + th->th.ompt_thread_info.state = omp_state_wait_ordered; /* OMPT event callback */ - if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_wait_ordered)( - th->th.ompt_thread_info.wait_id); + codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_ordered, omp_lock_hint_none, ompt_mutex_impl_spin, + (ompt_wait_id_t)lck, codeptr_ra); } } #endif @@ -797,16 +841,16 @@ else __kmp_parallel_deo(>id, &cid, loc); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { /* OMPT state update */ - th->th.ompt_thread_info.state = ompt_state_work_parallel; + th->th.ompt_thread_info.state = omp_state_work_parallel; th->th.ompt_thread_info.wait_id = 0; /* OMPT event callback */ - if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)( - th->th.ompt_thread_info.wait_id); + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra); } } #endif @@ -841,11 +885,13 @@ else __kmp_parallel_dxo(>id, &cid, loc); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_release_ordered)( - th->th.ompt_thread_info.wait_id); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_ordered, + (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value, + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif } @@ -1065,11 +1111,18 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) { #if KMP_USE_DYNAMIC_LOCK +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(global_tid); +#endif // OMPT_SUPPORT __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); #else KMP_COUNT_BLOCK(OMP_CRITICAL); KMP_TIME_PARTITIONED_BLOCK( OMP_critical_wait); /* Time spent waiting to enter the critical section */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + omp_state_t prev_state = omp_state_undefined; + ompt_thread_info_t ti; +#endif kmp_user_lock_p lck; KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); @@ -1103,6 +1156,25 @@ #if USE_ITT_BUILD __kmp_itt_critical_acquiring(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); + void *codeptr_ra = NULL; + if (ompt_enabled.enabled) { + ti = __kmp_threads[global_tid]->th.ompt_thread_info; + /* OMPT state update */ + prev_state = ti.state; + ti.wait_id = (ompt_wait_id_t)lck; + ti.state = omp_state_wait_critical; + + /* OMPT event callback */ + codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), + (ompt_wait_id_t)crit, codeptr_ra); + } + } +#endif // Value of 'crit' should be good for using as a critical_id of the critical // section directive. __kmp_acquire_user_lock_with_checks(lck, global_tid); @@ -1110,6 +1182,19 @@ #if USE_ITT_BUILD __kmp_itt_critical_acquired(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + /* OMPT state update */ + ti.state = prev_state; + ti.wait_id = 0; + + /* OMPT event callback */ + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra); + } + } +#endif KMP_START_EXPLICIT_TIMER(OMP_critical); KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); @@ -1162,6 +1247,76 @@ return __kmp_user_lock_seq; } +#if OMPT_SUPPORT && OMPT_OPTIONAL +static ompt_mutex_impl_t +__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { + if (user_lock) { + switch (KMP_EXTRACT_D_TAG(user_lock)) { + case 0: + break; +#if KMP_USE_FUTEX + case locktag_futex: + return ompt_mutex_impl_queuing; +#endif + case locktag_tas: + return ompt_mutex_impl_spin; +#if KMP_USE_TSX + case locktag_hle: + return ompt_mutex_impl_speculative; +#endif + default: + return ompt_mutex_impl_unknown; + } + ilock = KMP_LOOKUP_I_LOCK(user_lock); + } + KMP_ASSERT(ilock); + switch (ilock->type) { +#if KMP_USE_TSX + case locktag_adaptive: + case locktag_rtm: + return ompt_mutex_impl_speculative; +#endif + case locktag_nested_tas: + return ompt_mutex_impl_spin; +#if KMP_USE_FUTEX + case locktag_nested_futex: +#endif + case locktag_ticket: + case locktag_queuing: + case locktag_drdpa: + case locktag_nested_ticket: + case locktag_nested_queuing: + case locktag_nested_drdpa: + return ompt_mutex_impl_queuing; + default: + return ompt_mutex_impl_unknown; + } +} + +// For locks without dynamic binding +static ompt_mutex_impl_t __ompt_get_mutex_impl_type() { + switch (__kmp_user_lock_kind) { + case lk_tas: + return ompt_mutex_impl_spin; +#if KMP_USE_FUTEX + case lk_futex: +#endif + case lk_ticket: + case lk_queuing: + case lk_drdpa: + return ompt_mutex_impl_queuing; +#if KMP_USE_TSX + case lk_hle: + case lk_rtm: + case lk_adaptive: + return ompt_mutex_impl_speculative; +#endif + default: + return ompt_mutex_impl_unknown; + } +} +#endif + /*! @ingroup WORK_SHARING @param loc source location information. @@ -1179,6 +1334,14 @@ kmp_critical_name *crit, uintptr_t hint) { KMP_COUNT_BLOCK(OMP_CRITICAL); kmp_user_lock_p lck; +#if OMPT_SUPPORT && OMPT_OPTIONAL + omp_state_t prev_state = omp_state_undefined; + ompt_thread_info_t ti; + // This is the case, if called from __kmpc_critical: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); +#endif KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); @@ -1205,6 +1368,22 @@ #if USE_ITT_BUILD __kmp_itt_critical_acquiring(lck); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ti = __kmp_threads[global_tid]->th.ompt_thread_info; + /* OMPT state update */ + prev_state = ti.state; + ti.wait_id = (ompt_wait_id_t)lck; + ti.state = omp_state_wait_critical; + + /* OMPT event callback */ + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_critical, (unsigned int)hint, + __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr); + } + } +#endif #if KMP_USE_INLINED_TAS if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { KMP_ACQUIRE_TAS_LOCK(lck, global_tid); @@ -1227,12 +1406,41 @@ #if USE_ITT_BUILD __kmp_itt_critical_acquiring(lck); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ti = __kmp_threads[global_tid]->th.ompt_thread_info; + /* OMPT state update */ + prev_state = ti.state; + ti.wait_id = (ompt_wait_id_t)lck; + ti.state = omp_state_wait_critical; + + /* OMPT event callback */ + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_critical, (unsigned int)hint, + __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr); + } + } +#endif KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); } #if USE_ITT_BUILD __kmp_itt_critical_acquired(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + /* OMPT state update */ + ti.state = prev_state; + ti.wait_id = 0; + + /* OMPT event callback */ + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr); + } + } +#endif KMP_PUSH_PARTITIONED_TIMER(OMP_critical); KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); @@ -1319,14 +1527,18 @@ // section directive. __kmp_release_user_lock_with_checks(lck, global_tid); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_critical)) { - ompt_callbacks.ompt_callback(ompt_event_release_critical)((uint64_t)lck); +#endif // KMP_USE_DYNAMIC_LOCK + +#if OMPT_SUPPORT && OMPT_OPTIONAL + /* OMPT release event triggers after lock is released; place here to trigger + * for all #if branches */ + OMPT_STORE_RETURN_ADDRESS(global_tid); + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0)); } #endif -#endif // KMP_USE_DYNAMIC_LOCK KMP_POP_PARTITIONED_TIMER(); KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); } @@ -1351,10 +1563,24 @@ if (__kmp_env_consistency_check) __kmp_check_barrier(global_tid, ct_barrier, loc); +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif return (status != 0) ? 0 : 1; } @@ -1399,10 +1625,24 @@ __kmp_check_barrier(global_tid, ct_barrier, loc); } +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif ret = __kmpc_master(loc, global_tid); @@ -1445,26 +1685,33 @@ KMP_PUSH_PARTITIONED_TIMER(OMP_single); } -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL kmp_info_t *this_thr = __kmp_threads[global_tid]; kmp_team_t *team = this_thr->th.th_team; int tid = __kmp_tid_from_gtid(global_tid); - if (ompt_enabled) { + if (ompt_enabled.enabled) { if (rc) { - if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) { - ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id, - team->t.ompt_team_info.microtask); + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_executor, ompt_scope_begin, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); } } else { - if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) { - ompt_callbacks.ompt_callback(ompt_event_single_others_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_other, ompt_scope_begin, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_other, ompt_scope_end, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); } - this_thr->th.ompt_thread_info.state = ompt_state_wait_single; } } #endif @@ -1485,16 +1732,17 @@ __kmp_exit_single(global_tid); KMP_POP_PARTITIONED_TIMER(); -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL kmp_info_t *this_thr = __kmp_threads[global_tid]; kmp_team_t *team = this_thr->th.th_team; int tid = __kmp_tid_from_gtid(global_tid); - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) { - ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_executor, ompt_scope_end, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, + OMPT_GET_RETURN_ADDRESS(0)); } #endif } @@ -1509,12 +1757,28 @@ void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_work_type_t ompt_work_type; ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - ompt_callbacks.ompt_callback(ompt_event_loop_end)(team_info->parallel_id, - task_info->task_id); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + // Determine workshare type + if (loc != NULL) { + if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { + ompt_work_type = ompt_work_loop; + } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { + ompt_work_type = ompt_work_sections; + } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { + ompt_work_type = ompt_work_distribute; + } else { + KMP_ASSERT2(0, + "__kmpc_for_static_fini: can't determine workshare type"); + } + KMP_DEBUG_ASSERT(ompt_work_type); + } + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_type, ompt_scope_end, &(team_info->parallel_data), + &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); } #endif @@ -1711,6 +1975,15 @@ if (didit) *data_ptr = cpy_data; +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif /* This barrier is not a barrier region boundary */ #if USE_ITT_NOTIFY __kmp_threads[gtid]->th.th_ident = loc; @@ -1723,11 +1996,21 @@ // Consider next barrier a user-visible barrier for barrier region boundaries // Nesting checks are already handled by the single construct checks +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. // tasks can overwrite the location) #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } /* -------------------------------------------------------------------------- */ @@ -1814,6 +2097,19 @@ } __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_lock, (omp_lock_hint_t)hint, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif } /* initialize the lock with a hint */ @@ -1825,6 +2121,19 @@ } __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_nest_lock, (omp_lock_hint_t)hint, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif } #endif // KMP_USE_DYNAMIC_LOCK @@ -1839,6 +2148,19 @@ } __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif + #else // KMP_USE_DYNAMIC_LOCK static char const *const func = "omp_init_lock"; @@ -1869,9 +2191,15 @@ INIT_LOCK(lck); __kmp_set_user_lock_location(lck, loc); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_lock)) { - ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), + (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -1892,6 +2220,19 @@ } __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_nest_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif + #else // KMP_USE_DYNAMIC_LOCK static char const *const func = "omp_init_nest_lock"; @@ -1925,9 +2266,15 @@ INIT_NESTED_LOCK(lck); __kmp_set_user_lock_location(lck, loc); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) { - ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_init) { + ompt_callbacks.ompt_callback(ompt_callback_lock_init)( + ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), + (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -1950,6 +2297,22 @@ } __kmp_itt_lock_destroyed(lck); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_destroy) { + kmp_user_lock_p lck; + if (KMP_EXTRACT_D_TAG(user_lock) == 0) { + lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; + } else { + lck = (kmp_user_lock_p)user_lock; + } + ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + } +#endif KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); #else kmp_user_lock_p lck; @@ -1968,9 +2331,14 @@ lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); } -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) { - ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_destroy) { + ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2003,6 +2371,16 @@ kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); __kmp_itt_lock_destroyed(ilk->lock); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_destroy) { + ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + } +#endif KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); #else // KMP_USE_DYNAMIC_LOCK @@ -2025,10 +2403,14 @@ lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); } -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) { - ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_lock_destroy) { + ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2065,6 +2447,18 @@ (kmp_user_lock_p) user_lock); // itt function will get to the right lock object. #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif #if KMP_USE_INLINED_TAS if (tag == locktag_tas && !__kmp_env_consistency_check) { KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); @@ -2080,6 +2474,12 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + } +#endif #else // KMP_USE_DYNAMIC_LOCK @@ -2102,6 +2502,17 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), + (ompt_wait_id_t)lck, codeptr); + } +#endif ACQUIRE_LOCK(lck, gtid); @@ -2109,9 +2520,10 @@ __kmp_itt_lock_acquired(lck); #endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); } #endif @@ -2124,14 +2536,41 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); #endif - KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_nest_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } + } +#endif + int acquire_status = + KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); #if USE_ITT_BUILD __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); #endif -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - // missing support here: need to know whether acquired first or not +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { + if (ompt_enabled.ompt_callback_mutex_acquired) { + // lock_first + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + } + } else { + if (ompt_enabled.ompt_callback_nest_lock) { + // lock_next + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); + } + } } #endif @@ -2158,6 +2597,19 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_nest_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); + } + } +#endif ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); @@ -2165,16 +2617,20 @@ __kmp_itt_lock_acquired(lck); #endif /* USE_ITT_BUILD */ -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { - if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)) - ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)( - (uint64_t)lck); + if (ompt_enabled.ompt_callback_mutex_acquired) { + // lock_first + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); + } } else { - if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)) - ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)( - (uint64_t)lck); + if (ompt_enabled.ompt_callback_nest_lock) { + // lock_next + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_scope_begin, (ompt_wait_id_t)lck, codeptr); + } } } #endif @@ -2202,6 +2658,17 @@ __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); } +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + } +#endif + #else // KMP_USE_DYNAMIC_LOCK kmp_user_lock_p lck; @@ -2219,6 +2686,18 @@ #endif /* USE_ITT_BUILD */ TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); KMP_MB(); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); + } +#endif + return; #else lck = (kmp_user_lock_p)user_lock; @@ -2240,9 +2719,14 @@ RELEASE_LOCK(lck, gtid); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_lock)) { - ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t)lck); +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_released) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); } #endif @@ -2256,7 +2740,28 @@ #if USE_ITT_BUILD __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); #endif - KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); + int release_status = + KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) { + if (release_status == KMP_LOCK_RELEASED) { + if (ompt_enabled.ompt_callback_mutex_released) { + // release_lock_last + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + } + } else if (ompt_enabled.ompt_callback_nest_lock) { + // release_lock_prev + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr); + } + } +#endif #else // KMP_USE_DYNAMIC_LOCK @@ -2274,10 +2779,39 @@ #if USE_ITT_BUILD __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); #endif /* USE_ITT_BUILD */ + +#if OMPT_SUPPORT && OMPT_OPTIONAL + int release_status = KMP_LOCK_STILL_HELD; +#endif + if (--(tl->lk.depth_locked) == 0) { TCW_4(tl->lk.poll, 0); +#if OMPT_SUPPORT && OMPT_OPTIONAL + release_status = KMP_LOCK_RELEASED; +#endif } KMP_MB(); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) { + if (release_status == KMP_LOCK_RELEASED) { + if (ompt_enabled.ompt_callback_mutex_released) { + // release_lock_last + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); + } + } else if (ompt_enabled.ompt_callback_nest_lock) { + // release_lock_previous + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); + } + } +#endif + return; #else lck = (kmp_user_lock_p)user_lock; @@ -2300,17 +2834,22 @@ int release_status; release_status = RELEASE_NESTED_LOCK(lck, gtid); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) { if (release_status == KMP_LOCK_RELEASED) { - if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) { - ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)( - (uint64_t)lck); + if (ompt_enabled.ompt_callback_mutex_released) { + // release_lock_last + ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); } - } else if (ompt_callbacks.ompt_callback( - ompt_event_release_nest_lock_prev)) { - ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)( - (uint64_t)lck); + } else if (ompt_enabled.ompt_callback_nest_lock) { + // release_lock_previous + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); } } #endif @@ -2328,6 +2867,18 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif #if KMP_USE_INLINED_TAS if (tag == locktag_tas && !__kmp_env_consistency_check) { KMP_TEST_TAS_LOCK(user_lock, gtid, rc); @@ -2344,6 +2895,12 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + } +#endif return FTN_TRUE; } else { #if USE_ITT_BUILD @@ -2374,6 +2931,17 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), + (ompt_wait_id_t)lck, codeptr); + } +#endif rc = TEST_LOCK(lck, gtid); #if USE_ITT_BUILD @@ -2383,6 +2951,13 @@ __kmp_itt_lock_cancelled(lck); } #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (rc && ompt_enabled.ompt_callback_mutex_acquired) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); + } +#endif + return (rc ? FTN_TRUE : FTN_FALSE); /* Can't use serial interval since not block structured */ @@ -2397,6 +2972,18 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_nest_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, + codeptr); + } +#endif rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); #if USE_ITT_BUILD if (rc) { @@ -2405,6 +2992,23 @@ __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); } #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled && rc) { + if (rc == 1) { + if (ompt_enabled.ompt_callback_mutex_acquired) { + // lock_first + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + } + } else { + if (ompt_enabled.ompt_callback_nest_lock) { + // lock_next + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); + } + } + } +#endif return rc; #else // KMP_USE_DYNAMIC_LOCK @@ -2432,6 +3036,19 @@ __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + // This is the case, if called from omp_init_lock_with_hint: + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + if (ompt_enabled.enabled) && + ompt_enabled.ompt_callback_mutex_acquire) { + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( + ompt_mutex_nest_lock, omp_lock_hint_none, + __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); + } +#endif + rc = TEST_NESTED_LOCK(lck, gtid); #if USE_ITT_BUILD if (rc) { @@ -2440,6 +3057,23 @@ __kmp_itt_lock_cancelled(lck); } #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled && rc) { + if (rc == 1) { + if (ompt_enabled.ompt_callback_mutex_acquired) { + // lock_first + ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); + } + } else { + if (ompt_enabled.ompt_callback_nest_lock) { + // lock_next + ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( + ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr); + } + } + } +#endif return rc; /* Can't use serial interval since not block structured */ @@ -2699,6 +3333,19 @@ // this barrier should be invisible to a customer and to the threading profile // tool (it's neither a terminating barrier nor customer's code, it's // used for an internal purpose) +#if OMPT_SUPPORT + // JP: can this barrier potentially leed to task scheduling? + // JP: as long as there is a barrier in the implementation, OMPT should and + // will provide the barrier events + // so we set-up the necessary frame/return addresses. + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif @@ -2706,6 +3353,11 @@ __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), global_tid, FALSE, reduce_size, reduce_data, reduce_func); retval = (retval != 0) ? (0) : (1); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif // all other workers except master should do this pop here // ( none of other workers will get to __kmpc_end_reduce_nowait() ) @@ -2861,6 +3513,15 @@ // case tree_reduce_block: // this barrier should be visible to a customer and to the threading profile // tool (it's a terminating barrier on constructs if NOWAIT not specified) +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames @@ -2869,6 +3530,11 @@ __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), global_tid, TRUE, reduce_size, reduce_data, reduce_func); retval = (retval != 0) ? (0) : (1); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif // all other workers except master should do this pop here // ( none of other workers except master will enter __kmpc_end_reduce() ) @@ -2918,28 +3584,70 @@ __kmp_end_critical_section_reduce_block(loc, global_tid, lck); // TODO: implicit barrier: should be exposed +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } else if (packed_reduction_method == empty_reduce_block) { // usage: if team size==1, no synchronization is required (Intel platforms only) // TODO: implicit barrier: should be exposed +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } else if (packed_reduction_method == atomic_reduce_block) { +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(global_tid); + } +#endif // TODO: implicit barrier: should be exposed #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; #endif __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } else if (TEST_REDUCTION_METHOD(packed_reduction_method, tree_reduce_block)) { Index: runtime/src/kmp_debug.h =================================================================== --- runtime/src/kmp_debug.h +++ runtime/src/kmp_debug.h @@ -2,7 +2,6 @@ * kmp_debug.h -- debug / assertion code for Assure library */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_DEBUG_H #define KMP_DEBUG_H Index: runtime/src/kmp_debug.cpp =================================================================== --- runtime/src/kmp_debug.cpp +++ runtime/src/kmp_debug.cpp @@ -2,7 +2,6 @@ * kmp_debug.cpp -- debug utilities for the Guide library */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_debug.h" /* really necessary? */ #include "kmp_i18n.h" Index: runtime/src/kmp_debugger.h =================================================================== --- runtime/src/kmp_debugger.h +++ runtime/src/kmp_debugger.h @@ -3,7 +3,6 @@ * kmp_debugger.h -- debugger support. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -13,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_DEBUGGER_H #define KMP_DEBUGGER_H Index: runtime/src/kmp_debugger.cpp =================================================================== --- runtime/src/kmp_debugger.cpp +++ runtime/src/kmp_debugger.cpp @@ -3,7 +3,6 @@ * kmp_debugger.cpp -- debugger support. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -13,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_lock.h" #include "kmp_omp.h" Index: runtime/src/kmp_dispatch.cpp =================================================================== --- runtime/src/kmp_dispatch.cpp +++ runtime/src/kmp_dispatch.cpp @@ -2,7 +2,6 @@ * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - /* Dynamic scheduling initialization and dispatch. * * NOTE: __kmp_nth is a constant inside of any dispatch loop, however @@ -1232,12 +1230,16 @@ } #endif // ( KMP_STATIC_STEAL_ENABLED ) -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + kmp_info_t *thr = __kmp_threads[gtid]; + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), + tc, + OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif } @@ -1392,16 +1394,18 @@ /* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini() is not called. */ -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL #define OMPT_LOOP_END \ if (status == 0) { \ - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \ + if (ompt_enabled.ompt_callback_work) { \ ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \ - ompt_callbacks.ompt_callback(ompt_event_loop_end)( \ - team_info->parallel_id, task_info->task_id); \ + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ + ompt_callbacks.ompt_callback(ompt_callback_work)( \ + ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \ + &(task_info->task_data), 0, codeptr); \ } \ } +// TODO: implement count #else #define OMPT_LOOP_END // no-op #endif @@ -1409,7 +1413,12 @@ template static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub, - typename traits_t::signed_t *p_st) { + typename traits_t::signed_t *p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + void *codeptr +#endif + ) { typedef typename traits_t::unsigned_t UT; typedef typename traits_t::signed_t ST; @@ -1978,8 +1987,8 @@ pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default // use dynamic-style shcedule // atomically inrement iterations, get old value - init = test_then_add( - RCAST(volatile ST *, &sh->u.s.iteration), (ST)chunkspec); + init = test_then_add(RCAST(volatile ST *, &sh->u.s.iteration), + (ST)chunkspec); remaining = trip - init; if (remaining <= 0) { status = 0; // all iterations got by other threads @@ -2057,8 +2066,8 @@ if ((T)remaining < pr->u.p.parm2) { // use dynamic-style shcedule // atomically inrement iterations, get old value - init = test_then_add( - RCAST(volatile ST *, &sh->u.s.iteration), (ST)chunk); + init = test_then_add(RCAST(volatile ST *, &sh->u.s.iteration), + (ST)chunk); remaining = trip - init; if (remaining <= 0) { status = 0; // all iterations got by other threads @@ -2529,6 +2538,9 @@ enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } /*! @@ -2538,6 +2550,9 @@ enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2548,6 +2563,9 @@ enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2558,6 +2576,9 @@ enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2575,6 +2596,9 @@ kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2584,6 +2608,9 @@ kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2593,6 +2620,9 @@ kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2602,6 +2632,9 @@ kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) { KMP_DEBUG_ASSERT(__kmp_init_serial); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } @@ -2621,7 +2654,15 @@ */ int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) { - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); } /*! @@ -2630,7 +2671,15 @@ int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st) { - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); } /*! @@ -2638,7 +2687,15 @@ */ int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) { - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); } /*! @@ -2647,7 +2704,15 @@ int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st) { - return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_LOAD_RETURN_ADDRESS(gtid) +#endif + ); } /*! Index: runtime/src/kmp_environment.h =================================================================== --- runtime/src/kmp_environment.h +++ runtime/src/kmp_environment.h @@ -2,7 +2,6 @@ * kmp_environment.h -- Handle environment varoiables OS-independently. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_ENVIRONMENT_H #define KMP_ENVIRONMENT_H Index: runtime/src/kmp_environment.cpp =================================================================== --- runtime/src/kmp_environment.cpp +++ runtime/src/kmp_environment.cpp @@ -2,7 +2,6 @@ * kmp_environment.cpp -- Handle environment variables OS-independently. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - /* We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv()) unavailable. getenv() apparently gets a clean copy of Index: runtime/src/kmp_error.h =================================================================== --- runtime/src/kmp_error.h +++ runtime/src/kmp_error.h @@ -2,7 +2,6 @@ * kmp_error.h -- PTS functions for error checking at runtime. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_ERROR_H #define KMP_ERROR_H Index: runtime/src/kmp_error.cpp =================================================================== --- runtime/src/kmp_error.cpp +++ runtime/src/kmp_error.cpp @@ -2,7 +2,6 @@ * kmp_error.cpp -- KPTS functions for error checking at runtime */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_error.h" #include "kmp_i18n.h" Index: runtime/src/kmp_ftn_cdecl.cpp =================================================================== --- runtime/src/kmp_ftn_cdecl.cpp +++ runtime/src/kmp_ftn_cdecl.cpp @@ -2,7 +2,6 @@ * kmp_ftn_cdecl.cpp -- Fortran __cdecl linkage support for OpenMP. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_affinity.h" Index: runtime/src/kmp_ftn_entry.h =================================================================== --- runtime/src/kmp_ftn_entry.h +++ runtime/src/kmp_ftn_entry.h @@ -2,7 +2,6 @@ * kmp_ftn_entry.h -- Fortran entry linkage support for OpenMP. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef FTN_STDCALL #error The support file kmp_ftn_entry.h should not be compiled by itself. #endif @@ -23,6 +21,10 @@ #include "kmp_i18n.h" +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif + #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -342,6 +344,26 @@ #endif } +#if OMP_50_ENABLED +int FTN_STDCALL FTN_CONTROL_TOOL(uint64_t command, uint64_t modifier, + void *arg) { +#if defined(KMP_STUB) || !OMPT_SUPPORT + return -2; +#else + OMPT_STORE_RETURN_ADDRESS(__kmp_entry_gtid()); + if (!TCR_4(__kmp_init_middle)) { + return -2; + } + kmp_info_t *this_thr = __kmp_threads[__kmp_entry_gtid()]; + ompt_task_info_t *parent_task_info = OMPT_CUR_TASK_INFO(this_thr); + parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + int ret = __kmp_control_tool(command, modifier, arg); + parent_task_info->frame.reenter_runtime_frame = 0; + return ret; +#endif +} +#endif + int FTN_STDCALL xexpand(FTN_GET_THREAD_NUM)(void) { #ifdef KMP_STUB return 0; @@ -567,7 +589,6 @@ #if OMP_40_ENABLED - kmp_proc_bind_t FTN_STDCALL xexpand(FTN_GET_PROC_BIND)(void) { #ifdef KMP_STUB return __kmps_get_proc_bind(); @@ -876,8 +897,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNLOCKED; #else - __kmpc_init_lock_with_hint(NULL, __kmp_entry_gtid(), user_lock, - KMP_DEREF hint); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_init_lock_with_hint(NULL, gtid, user_lock, KMP_DEREF hint); #endif } @@ -886,8 +910,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNLOCKED; #else - __kmpc_init_nest_lock_with_hint(NULL, __kmp_entry_gtid(), user_lock, - KMP_DEREF hint); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_init_nest_lock_with_hint(NULL, gtid, user_lock, KMP_DEREF hint); #endif } #endif @@ -897,7 +924,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNLOCKED; #else - __kmpc_init_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_init_lock(NULL, gtid, user_lock); #endif } @@ -906,7 +937,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNLOCKED; #else - __kmpc_init_nest_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_init_nest_lock(NULL, gtid, user_lock); #endif } @@ -914,7 +949,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNINIT; #else - __kmpc_destroy_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_destroy_lock(NULL, gtid, user_lock); #endif } @@ -922,7 +961,11 @@ #ifdef KMP_STUB *((kmp_stub_lock_t *)user_lock) = UNINIT; #else - __kmpc_destroy_nest_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_destroy_nest_lock(NULL, gtid, user_lock); #endif } @@ -936,7 +979,11 @@ } *((kmp_stub_lock_t *)user_lock) = LOCKED; #else - __kmpc_set_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_set_lock(NULL, gtid, user_lock); #endif } @@ -947,7 +994,11 @@ } (*((int *)user_lock))++; #else - __kmpc_set_nest_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_set_nest_lock(NULL, gtid, user_lock); #endif } @@ -961,7 +1012,11 @@ } *((kmp_stub_lock_t *)user_lock) = UNLOCKED; #else - __kmpc_unset_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_unset_lock(NULL, gtid, user_lock); #endif } @@ -975,7 +1030,11 @@ } (*((int *)user_lock))--; #else - __kmpc_unset_nest_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_unset_nest_lock(NULL, gtid, user_lock); #endif } @@ -990,7 +1049,11 @@ *((kmp_stub_lock_t *)user_lock) = LOCKED; return 1; #else - return __kmpc_test_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmpc_test_lock(NULL, gtid, user_lock); #endif } @@ -1001,7 +1064,11 @@ } return ++(*((int *)user_lock)); #else - return __kmpc_test_nest_lock(NULL, __kmp_entry_gtid(), user_lock); + int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + return __kmpc_test_nest_lock(NULL, gtid, user_lock); #endif } @@ -1091,7 +1158,6 @@ /* ------------------------------------------------------------------------ */ - #if OMP_40_ENABLED /* returns the status of cancellation */ int FTN_STDCALL xexpand(FTN_GET_CANCELLATION)(void) { Index: runtime/src/kmp_ftn_extra.cpp =================================================================== --- runtime/src/kmp_ftn_extra.cpp +++ runtime/src/kmp_ftn_extra.cpp @@ -2,7 +2,6 @@ * kmp_ftn_extra.cpp -- Fortran 'extra' linkage support for OpenMP. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_affinity.h" Index: runtime/src/kmp_ftn_os.h =================================================================== --- runtime/src/kmp_ftn_os.h +++ runtime/src/kmp_ftn_os.h @@ -2,7 +2,6 @@ * kmp_ftn_os.h -- KPTS Fortran defines header file. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_FTN_OS_H #define KMP_FTN_OS_H @@ -135,6 +133,10 @@ #endif #endif +#if OMP_50_ENABLED +#define FTN_CONTROL_TOOL omp_control_tool +#endif + #endif /* KMP_FTN_PLAIN */ /* ------------------------------------------------------------------------ */ @@ -228,7 +230,6 @@ #define FTN_IS_INITIAL_DEVICE omp_is_initial_device_ #endif - #if OMP_40_ENABLED #define FTN_GET_CANCELLATION omp_get_cancellation_ #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status_ @@ -254,6 +255,10 @@ #endif #endif +#if OMP_50_ENABLED +#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL +#endif + #endif /* KMP_FTN_APPEND */ /* ------------------------------------------------------------------------ */ @@ -347,7 +352,6 @@ #define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE #endif - #if OMP_40_ENABLED #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS @@ -373,6 +377,10 @@ #endif #endif +#if OMP_50_ENABLED +#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL +#endif + #endif /* KMP_FTN_UPPER */ /* ------------------------------------------------------------------------ */ @@ -466,7 +474,6 @@ #define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE_ #endif - #if OMP_40_ENABLED #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION_ #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS_ @@ -492,6 +499,10 @@ #endif #endif +#if OMP_50_ENABLED +#define FTN_CONTROL_TOOL OMP_CONTROL_TOOL_ +#endif + #endif /* KMP_FTN_UAPPEND */ /* -------------------------- GOMP API NAMES ------------------------ */ Index: runtime/src/kmp_ftn_stdcall.cpp =================================================================== --- runtime/src/kmp_ftn_stdcall.cpp +++ runtime/src/kmp_ftn_stdcall.cpp @@ -2,7 +2,6 @@ * kmp_ftn_stdcall.cpp -- Fortran __stdcall linkage support for OpenMP. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" // Note: This string is not printed when KMP_VERSION=1. Index: runtime/src/kmp_global.cpp =================================================================== --- runtime/src/kmp_global.cpp +++ runtime/src/kmp_global.cpp @@ -2,7 +2,6 @@ * kmp_global.cpp -- KPTS global variables for runtime support library */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_affinity.h" @@ -127,7 +125,6 @@ int __kmp_allThreadsSpecified = 0; size_t __kmp_align_alloc = CACHE_LINE; - int __kmp_generate_warnings = kmp_warnings_low; int __kmp_reserve_warn = 0; int __kmp_xproc = 0; @@ -306,6 +303,10 @@ kmp_uint64 __kmp_taskloop_min_tasks = 0; #endif +#if OMP_50_ENABLED && OMPT_SUPPORT +char const *__kmp_tool_libraries = NULL; +#endif + /* This check ensures that the compiler is passing the correct data type for the flags formal parameter of the function kmpc_omp_task_alloc(). If the type is not a 4-byte type, then give an error message about a non-positive length Index: runtime/src/kmp_gsupport.cpp =================================================================== --- runtime/src/kmp_gsupport.cpp +++ runtime/src/kmp_gsupport.cpp @@ -2,7 +2,6 @@ * kmp_gsupport.cpp */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_atomic.h" @@ -33,14 +31,20 @@ int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_barrier"); KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL ompt_frame_t *ompt_frame; - if (ompt_enabled) { - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); } #endif __kmpc_barrier(&loc, gtid); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } // Mutual exclusion @@ -58,6 +62,9 @@ int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_critical_start"); KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); } @@ -65,6 +72,9 @@ int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_critical_end"); KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); } @@ -113,7 +123,40 @@ // 3rd parameter == FALSE prevents kmp_enter_single from pushing a // workshare when USE_CHECKS is defined. We need to avoid the push, // as there is no corresponding GOMP_single_end() call. - return __kmp_enter_single(gtid, &loc, FALSE); + kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *team = this_thr->th.th_team; + int tid = __kmp_tid_from_gtid(gtid); + + if (ompt_enabled.enabled) { + if (rc) { + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_executor, ompt_scope_begin, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + } + } else { + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_other, ompt_scope_begin, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_single_other, ompt_scope_end, + &(team->t.ompt_team_info.parallel_data), + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), + 1, OMPT_GET_RETURN_ADDRESS(0)); + } + } + } +#endif + + return rc; } void *xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) { @@ -131,14 +174,33 @@ if (__kmp_enter_single(gtid, &loc, FALSE)) return NULL; - // Wait for the first thread to set the copyprivate data pointer, - // and for all other threads to reach this point. +// Wait for the first thread to set the copyprivate data pointer, +// and for all other threads to reach this point. + +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); // Retrieve the value of the copyprivate data point, and wait for all // threads to do likewise, then return. retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif return retval; } @@ -151,14 +213,35 @@ // continuing, so that the know that the copyprivate data pointer has been // propagated to all threads before trying to reuse the t_copypriv_data field. __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif } void xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) { int gtid = __kmp_entry_gtid(); MKLOC(loc, "GOMP_ordered_start"); KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_ordered(&loc, gtid); } @@ -166,6 +249,9 @@ int gtid = __kmp_get_gtid(); MKLOC(loc, "GOMP_ordered_end"); KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_end_ordered(&loc, gtid); } @@ -199,26 +285,26 @@ #if OMPT_SUPPORT kmp_info_t *thr; ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; + omp_state_t enclosing_state; - if (ompt_enabled) { + if (ompt_enabled.enabled) { // get pointer to thread data structure thr = __kmp_threads[*gtid]; // save enclosing task state; set current state for task enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; + thr->th.ompt_thread_info.state = omp_state_work_parallel; // set task frame - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = __builtin_frame_address(0); + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0); } #endif task(data); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { // clear task frame ompt_frame->exit_runtime_frame = NULL; @@ -238,24 +324,29 @@ enum sched_type schedule, long start, long end, long incr, long chunk_size) { - // Intialize the loop worksharing construct. +// Intialize the loop worksharing construct. + +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + OMPT_STORE_RETURN_ADDRESS(*gtid); +#endif KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, schedule != kmp_sch_static); #if OMPT_SUPPORT kmp_info_t *thr; ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; + omp_state_t enclosing_state; - if (ompt_enabled) { + if (ompt_enabled.enabled) { thr = __kmp_threads[*gtid]; // save enclosing task state; set current state for task enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; + thr->th.ompt_thread_info.state = omp_state_work_parallel; // set task frame - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = __builtin_frame_address(0); + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0); } #endif @@ -263,7 +354,7 @@ task(data); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { // clear task frame ompt_frame->exit_runtime_frame = NULL; @@ -287,11 +378,8 @@ va_list ap; va_start(ap, argc); - rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, -#if OMPT_SUPPORT - VOLATILE_CAST(void *) unwrapped_task, -#endif - wrapper, __kmp_invoke_task_func, + rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper, + __kmp_invoke_task_func, #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX &ap #else @@ -306,18 +394,19 @@ } #if OMPT_SUPPORT - if (ompt_enabled) { -#if OMPT_TRACE + int ompt_team_size; + if (ompt_enabled.enabled) { ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); // implicit task callback - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - team_info->parallel_id, task_info->task_id); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid)); } -#endif - thr->th.ompt_thread_info.state = ompt_state_work_parallel; + thr->th.ompt_thread_info.state = omp_state_work_parallel; } #endif } @@ -325,47 +414,9 @@ static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, void (*task)(void *)) { #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id; - if (ompt_enabled) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - - ompt_parallel_id = __ompt_parallel_id_new(gtid); - - // parallel region callback - if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { - int team_size = 1; - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( - task_info->task_id, &task_info->frame, ompt_parallel_id, team_size, - (void *)task, OMPT_INVOKER(fork_context_gnu)); - } - } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif - __kmp_serialized_parallel(loc, gtid); - -#if OMPT_SUPPORT - if (ompt_enabled) { - kmp_info_t *thr = __kmp_threads[gtid]; - - ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid); - - // set up lightweight task - ompt_lw_taskteam_t *lwt = - (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); - __ompt_lw_taskteam_init(lwt, thr, gtid, (void *)task, ompt_parallel_id); - lwt->ompt_task_info.task_id = my_ompt_task_id; - __ompt_lw_taskteam_link(lwt, thr); - -#if OMPT_TRACE - // implicit task callback - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - ompt_parallel_id, my_ompt_task_id); - } - thr->th.ompt_thread_info.state = ompt_state_work_parallel; -#endif - } -#endif } void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, @@ -375,10 +426,11 @@ #if OMPT_SUPPORT ompt_frame_t *parent_frame, *frame; - if (ompt_enabled) { - parent_frame = __ompt_get_task_frame_internal(0); - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); + parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif MKLOC(loc, "GOMP_parallel_start"); @@ -396,9 +448,9 @@ } #if OMPT_SUPPORT - if (ompt_enabled) { - frame = __ompt_get_task_frame_internal(0); - frame->exit_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL); + frame->exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } #endif } @@ -406,44 +458,23 @@ void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) { int gtid = __kmp_get_gtid(); kmp_info_t *thr; + int ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc; thr = __kmp_threads[gtid]; MKLOC(loc, "GOMP_parallel_end"); KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid)); -#if OMPT_SUPPORT - ompt_parallel_id_t parallel_id; - ompt_task_id_t serialized_task_id; - ompt_frame_t *ompt_frame = NULL; - - if (ompt_enabled) { - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - parallel_id = team_info->parallel_id; - - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - serialized_task_id = task_info->task_id; - - // unlink if necessary. no-op if there is not a lightweight task. - ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr); - // GOMP allocates/frees lwt since it can't be kept on the stack - if (lwt) { - __kmp_free(lwt); - } - } -#endif - if (!thr->th.th_team->t.t_serialized) { __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, thr->th.th_team); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { // Implicit task is finished here, in the barrier we might schedule // deferred tasks, // these don't see the implicit task on the stack - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = NULL; + OMPT_CUR_TASK_INFO(thr)->frame.exit_runtime_frame = NULL; } #endif @@ -454,35 +485,7 @@ #endif ); } else { -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - parallel_id, serialized_task_id); - } -#endif - __kmpc_end_serialized_parallel(&loc, gtid); - -#if OMPT_SUPPORT - if (ompt_enabled) { - // Record that we re-entered the runtime system in the frame that - // created the parallel region. - ompt_task_info_t *parent_task_info = __ompt_get_taskinfo(0); - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - parallel_id, parent_task_info->task_id, - OMPT_INVOKER(fork_context_gnu)); - } - - parent_task_info->frame.reenter_runtime_frame = NULL; - - thr->th.ompt_thread_info.state = - (((thr->th.th_team)->t.t_serialized) ? ompt_state_work_serial - : ompt_state_work_parallel); - } -#endif } } @@ -510,6 +513,12 @@ // num and calculate the iteration space using the result. It doesn't do this // with ordered static loop, so they can be checked. +#if OMPT_SUPPORT +#define IF_OMPT_SUPPORT(code) code +#else +#define IF_OMPT_SUPPORT(code) +#endif + #define LOOP_START(func, schedule) \ int func(long lb, long ub, long str, long chunk_sz, long *p_lb, \ long *p_ub) { \ @@ -522,9 +531,11 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ (schedule) != kmp_sch_static); \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ if (status) { \ @@ -553,8 +564,10 @@ gtid, lb, ub, str, chunk_sz)); \ \ if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ if (status) { \ @@ -579,6 +592,7 @@ MKLOC(loc, #func); \ KA_TRACE(20, (#func ": T#%d\n", gtid)); \ \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ (kmp_int *)p_ub, (kmp_int *)&stride); \ if (status) { \ @@ -623,7 +637,20 @@ int gtid = __kmp_get_gtid(); KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) } @@ -798,17 +825,18 @@ KA_TRACE(20, (#func " exit: T#%d\n", gtid)); \ } -#if OMPT_SUPPORT +#if OMPT_SUPPORT && OMPT_OPTIONAL #define OMPT_LOOP_PRE() \ ompt_frame_t *parent_frame; \ - if (ompt_enabled) { \ - parent_frame = __ompt_get_task_frame_internal(0); \ - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); \ - } + if (ompt_enabled.enabled) { \ + __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \ + parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); \ + } \ + OMPT_STORE_RETURN_ADDRESS(gtid); #define OMPT_LOOP_POST() \ - if (ompt_enabled) { \ + if (ompt_enabled.enabled) { \ parent_frame->reenter_runtime_frame = NULL; \ } @@ -880,6 +908,16 @@ } } +#if OMPT_SUPPORT + kmp_taskdata_t *current_task; + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + current_task = __kmp_threads[gtid]->th.th_current_task; + current_task->ompt_task_info.frame.reenter_runtime_frame = + OMPT_GET_FRAME_ADDRESS(1); + } +#endif + if (if_cond) { #if OMP_40_ENABLED if (gomp_flags & 8) { @@ -895,23 +933,26 @@ dep_list[i].flags.out = (i < nout); } __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); - } else + } else { #endif __kmpc_omp_task(&loc, gtid, task); + } } else { #if OMPT_SUPPORT ompt_thread_info_t oldInfo; kmp_info_t *thread; kmp_taskdata_t *taskdata; - if (ompt_enabled) { + kmp_taskdata_t *current_task; + if (ompt_enabled.enabled) { // Store the threads states and restore them after the task thread = __kmp_threads[gtid]; taskdata = KMP_TASK_TO_TASKDATA(task); oldInfo = thread->th.ompt_thread_info; thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = ompt_state_work_parallel; + thread->th.ompt_thread_info.state = omp_state_work_parallel; taskdata->ompt_task_info.frame.exit_runtime_frame = - __builtin_frame_address(0); + OMPT_GET_FRAME_ADDRESS(0); + OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -920,12 +961,17 @@ __kmpc_omp_task_complete_if0(&loc, gtid, task); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { thread->th.ompt_thread_info = oldInfo; taskdata->ompt_task_info.frame.exit_runtime_frame = NULL; } #endif } +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + current_task->ompt_task_info.frame.reenter_runtime_frame = NULL; + } +#endif KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); } @@ -934,6 +980,11 @@ MKLOC(loc, "GOMP_taskwait"); int gtid = __kmp_entry_gtid(); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); __kmpc_omp_taskwait(&loc, gtid); @@ -1003,10 +1054,11 @@ #if OMPT_SUPPORT ompt_frame_t *parent_frame; - if (ompt_enabled) { - parent_frame = __ompt_get_task_frame_internal(0); - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); + parent_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif MKLOC(loc, "GOMP_parallel_sections_start"); @@ -1025,7 +1077,7 @@ } #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { parent_frame->reenter_runtime_frame = NULL; } #endif @@ -1039,7 +1091,20 @@ int gtid = __kmp_get_gtid(); KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } +#endif KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) } @@ -1065,10 +1130,11 @@ #if OMPT_SUPPORT ompt_task_info_t *parent_task_info, *task_info; - if (ompt_enabled) { - parent_task_info = __ompt_get_taskinfo(0); - parent_task_info->frame.reenter_runtime_frame = __builtin_frame_address(1); + if (ompt_enabled.enabled) { + parent_task_info = __ompt_get_task_info_object(0); + parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { if (num_threads != 0) { @@ -1084,15 +1150,20 @@ __kmp_GOMP_serialized_parallel(&loc, gtid, task); } #if OMPT_SUPPORT - if (ompt_enabled) { - task_info = __ompt_get_taskinfo(0); - task_info->frame.exit_runtime_frame = __builtin_frame_address(0); + if (ompt_enabled.enabled) { + task_info = __ompt_get_task_info_object(0); + task_info->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(0); } #endif task(data); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { task_info->frame.exit_runtime_frame = NULL; parent_task_info->frame.reenter_runtime_frame = NULL; } @@ -1108,6 +1179,10 @@ MKLOC(loc, "GOMP_parallel_sections"); KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid)); +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { if (num_threads != 0) { __kmp_push_num_threads(&loc, gtid, num_threads); @@ -1155,6 +1230,8 @@ __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ } \ \ + IF_OMPT_SUPPORT(if (ompt_enabled.enabled) \ + OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ (schedule) != kmp_sch_static); \ @@ -1179,6 +1256,11 @@ MKLOC(loc, "GOMP_taskgroup_start"); KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid)); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_taskgroup(&loc, gtid); return; @@ -1189,6 +1271,11 @@ MKLOC(loc, "GOMP_taskgroup_end"); KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid)); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif + __kmpc_end_taskgroup(&loc, gtid); return; Index: runtime/src/kmp_i18n.h =================================================================== --- runtime/src/kmp_i18n.h +++ runtime/src/kmp_i18n.h @@ -2,7 +2,6 @@ * kmp_i18n.h */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_I18N_H #define KMP_I18N_H Index: runtime/src/kmp_i18n.cpp =================================================================== --- runtime/src/kmp_i18n.cpp +++ runtime/src/kmp_i18n.cpp @@ -2,7 +2,6 @@ * kmp_i18n.cpp */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp_i18n.h" #include "kmp.h" Index: runtime/src/kmp_import.cpp =================================================================== --- runtime/src/kmp_import.cpp +++ runtime/src/kmp_import.cpp @@ -2,7 +2,6 @@ * kmp_import.cpp */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - /* Object generated from this source file is linked to Windows* OS DLL import library (libompmd.lib) only! It is not a part of regular static or dynamic OpenMP RTL. Any code that just needs to go in the libompmd.lib (but not in Index: runtime/src/kmp_io.h =================================================================== --- runtime/src/kmp_io.h +++ runtime/src/kmp_io.h @@ -2,7 +2,6 @@ * kmp_io.h -- RTL IO header file. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_IO_H #define KMP_IO_H Index: runtime/src/kmp_io.cpp =================================================================== --- runtime/src/kmp_io.cpp +++ runtime/src/kmp_io.cpp @@ -2,7 +2,6 @@ * kmp_io.cpp -- RTL IO */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include #include #include Index: runtime/src/kmp_itt.h =================================================================== --- runtime/src/kmp_itt.h +++ runtime/src/kmp_itt.h @@ -3,7 +3,6 @@ * kmp_itt.h -- ITT Notify interface. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -13,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_ITT_H #define KMP_ITT_H Index: runtime/src/kmp_itt.cpp =================================================================== --- runtime/src/kmp_itt.cpp +++ runtime/src/kmp_itt.cpp @@ -5,7 +5,6 @@ * kmp_itt.cpp -- ITT Notify interface. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -15,7 +14,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp_itt.h" #if KMP_DEBUG Index: runtime/src/kmp_lock.h =================================================================== --- runtime/src/kmp_lock.h +++ runtime/src/kmp_lock.h @@ -2,7 +2,6 @@ * kmp_lock.h -- lock header file */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_LOCK_H #define KMP_LOCK_H @@ -1144,7 +1142,7 @@ // with/without consistency checking. extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *); -extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); @@ -1152,7 +1150,7 @@ // with/withuot consistency checking. extern void (*__kmp_indirect_init[])(kmp_user_lock_p); extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p); -extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); Index: runtime/src/kmp_lock.cpp =================================================================== --- runtime/src/kmp_lock.cpp +++ runtime/src/kmp_lock.cpp @@ -2,7 +2,6 @@ * kmp_lock.cpp -- lock-related functions */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include #include @@ -1137,7 +1135,7 @@ kmp_int32 need_mf = 1; #if OMPT_SUPPORT - ompt_state_t prev_state = ompt_state_undefined; + omp_state_t prev_state = omp_state_undefined; #endif KA_TRACE(1000, @@ -1245,7 +1243,7 @@ #endif #if OMPT_SUPPORT - if (ompt_enabled && prev_state != ompt_state_undefined) { + if (ompt_enabled.enabled && prev_state != omp_state_undefined) { /* change the state before clearing wait_id */ this_thr->th.ompt_thread_info.state = prev_state; this_thr->th.ompt_thread_info.wait_id = 0; @@ -1260,11 +1258,11 @@ } #if OMPT_SUPPORT - if (ompt_enabled && prev_state == ompt_state_undefined) { + if (ompt_enabled.enabled && prev_state == omp_state_undefined) { /* this thread will spin; set wait_id before entering wait state */ prev_state = this_thr->th.ompt_thread_info.state; this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; - this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; + this_thr->th.ompt_thread_info.state = omp_state_wait_lock; } #endif @@ -2913,11 +2911,11 @@ static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, kmp_dyna_lockseq_t tag); static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); -static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); +static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); -static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, - kmp_int32); +static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, + kmp_int32); static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, kmp_int32); static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, @@ -2940,14 +2938,13 @@ // set/acquire functions #define expand(l, op) \ - 0, (void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, -static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { + 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, +static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; #undef expand #define expand(l, op) \ - 0, (void (*)(kmp_dyna_lock_t *, \ - kmp_int32))__kmp_##op##_##l##_lock_with_checks, -static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { + 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, +static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; #undef expand @@ -2970,7 +2967,7 @@ #undef expand // Exposes only one set of jump tables (*lock or *lock_with_checks). -void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; +int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; @@ -2984,13 +2981,13 @@ // set/acquire functions #define expand(l, op) \ - (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, -static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { - KMP_FOREACH_I_LOCK(expand, acquire)}; + (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, +static int (*indirect_set[])(kmp_user_lock_p, + kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; #undef expand #define expand(l, op) \ - (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, -static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { + (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, +static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire)}; #undef expand @@ -3011,7 +3008,7 @@ #undef expand // Exposes only one jump tables (*lock or *lock_with_checks). -void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; +int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; @@ -3166,9 +3163,9 @@ __kmp_release_lock(&__kmp_global_lock, gtid); } -static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { +static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); + return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); } static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { @@ -3181,11 +3178,11 @@ return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); } -static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, - kmp_int32 gtid) { +static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, + kmp_int32 gtid) { kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); - KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); + return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); } static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, Index: runtime/src/kmp_omp.h =================================================================== --- runtime/src/kmp_omp.h +++ runtime/src/kmp_omp.h @@ -4,7 +4,6 @@ * This is for information about runtime library structures. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -14,7 +13,6 @@ // //===----------------------------------------------------------------------===// - /* THIS FILE SHOULD NOT BE MODIFIED IN IDB INTERFACE LIBRARY CODE It should instead be modified in the OpenMP runtime and copied to the interface library code. This way we can minimize the problems that this is Index: runtime/src/kmp_os.h =================================================================== --- runtime/src/kmp_os.h +++ runtime/src/kmp_os.h @@ -2,7 +2,6 @@ * kmp_os.h -- KPTS runtime header file. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_OS_H #define KMP_OS_H @@ -247,9 +245,9 @@ #include static inline int KMP_GET_PAGE_SIZE(void) { - SYSTEM_INFO si; - GetSystemInfo(&si); - return si.dwPageSize; + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; } #else #define KMP_GET_PAGE_SIZE() getpagesize() @@ -829,7 +827,6 @@ #define KMP_USE_BGET 1 #endif - // Switches for OSS builds #ifndef USE_SYSFS_INFO #define USE_SYSFS_INFO 0 Index: runtime/src/kmp_platform.h =================================================================== --- runtime/src/kmp_platform.h +++ runtime/src/kmp_platform.h @@ -2,7 +2,6 @@ * kmp_platform.h -- header for determining operating system and architecture */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_PLATFORM_H #define KMP_PLATFORM_H Index: runtime/src/kmp_runtime.cpp =================================================================== --- runtime/src/kmp_runtime.cpp +++ runtime/src/kmp_runtime.cpp @@ -2,7 +2,6 @@ * kmp_runtime.cpp -- KPTS runtime support library */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_affinity.h" #include "kmp_atomic.h" @@ -724,16 +722,6 @@ /* TODO replace with general release procedure */ team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { - /* accept blame for "ordered" waiting */ - kmp_info_t *this_thread = __kmp_threads[gtid]; - ompt_callbacks.ompt_callback(ompt_event_release_ordered)( - this_thread->th.ompt_thread_info.wait_id); - } -#endif - KMP_MB(); /* Flush all pending memory write invalidates. */ } #endif /* BUILD_PARALLEL_ORDERED */ @@ -1107,15 +1095,16 @@ __kmp_store_mxcsr(&mxcsr); mxcsr &= KMP_X86_MXCSR_MASK; -// There is no point looking at t_fp_control_saved here. -// If it is TRUE, we still have to update the values if they are different from -// those we now have. -// If it is FALSE we didn't save anything yet, but our objective is the same. We -// have to ensure that the values in the team are the same as those we have. -// So, this code achieves what we need whether or not t_fp_control_saved is -// true. By checking whether the value needs updating we avoid unnecessary -// writes that would put the cache-line into a written state, causing all -// threads in the team to have to read it again. + // There is no point looking at t_fp_control_saved here. + // If it is TRUE, we still have to update the values if they are different + // from those we now have. + // If it is FALSE we didn't save anything yet, but our objective is the + // same. We have to ensure that the values in the team are the same as + // those we have. + // So, this code achieves what we need whether or not t_fp_control_saved is + // true. By checking whether the value needs updating we avoid unnecessary + // writes that would put the cache-line into a written state, causing all + // threads in the team to have to read it again. KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); // Although we don't use this value, other code in the runtime wants to know @@ -1206,6 +1195,29 @@ this_thr->th.th_set_proc_bind = proc_bind_default; #endif /* OMP_40_ENABLED */ +#if OMPT_SUPPORT + ompt_data_t ompt_parallel_data; + ompt_parallel_data.ptr = NULL; + ompt_data_t *implicit_task_data; + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); + if (ompt_enabled.enabled && + this_thr->th.ompt_thread_info.state != omp_state_overhead) { + + ompt_task_info_t *parent_task_info; + parent_task_info = OMPT_CUR_TASK_INFO(this_thr); + + parent_task_info->frame.reenter_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_enabled.ompt_callback_parallel_begin) { + int team_size = 1; + + ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( + &(parent_task_info->task_data), &(parent_task_info->frame), + &ompt_parallel_data, team_size, + ompt_invoker_program, codeptr); + } + } +#endif // OMPT_SUPPORT + if (this_thr->th.th_team != serial_team) { // Nested level will be an index in the nested nthreads array int level = this_thr->th.th_team->t.t_level; @@ -1217,13 +1229,9 @@ __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); -#endif - new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_data, #endif #if OMP_40_ENABLED proc_bind, @@ -1318,11 +1326,6 @@ } this_thr->th.th_dispatch = serial_team->t.t_dispatch; -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); - __ompt_team_assign_id(serial_team, ompt_parallel_id); -#endif - KMP_MB(); } else { @@ -1366,17 +1369,41 @@ if (__kmp_env_consistency_check) __kmp_push_parallel(global_tid, NULL); +#if OMPT_SUPPORT + serial_team->t.ompt_team_info.master_return_address = codeptr; + if (ompt_enabled.enabled && + this_thr->th.ompt_thread_info.state != omp_state_overhead) { + OMPT_CUR_TASK_INFO(this_thr) + ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + + ompt_lw_taskteam_t lw_taskteam; + __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, + &ompt_parallel_data, codeptr); + + __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1); + // don't use lw_taskteam after linking. content was swaped + + /* OMPT implicit task begin */ + implicit_task_data = OMPT_CUR_TASK_DATA(this_thr); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), + OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid)); + } + + /* OMPT state */ + this_thr->th.ompt_thread_info.state = omp_state_work_parallel; + OMPT_CUR_TASK_INFO(this_thr) + ->frame.exit_runtime_frame = OMPT_GET_FRAME_ADDRESS(1); + } +#endif } /* most of the work for a fork */ /* return true if we really went parallel, false if serialized */ int __kmp_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context, // Intel, GNU, ... - kmp_int32 argc, -#if OMPT_SUPPORT - void *unwrapped_task, -#endif - microtask_t microtask, launch_t invoker, + kmp_int32 argc, microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX va_list *ap @@ -1434,16 +1461,17 @@ master_set_numthreads = master_th->th.th_set_nproc; #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id; - ompt_task_id_t ompt_task_id; + ompt_data_t ompt_parallel_data; + ompt_parallel_data.ptr = NULL; + ompt_data_t *parent_task_data; ompt_frame_t *ompt_frame; - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; + ompt_data_t *implicit_task_data; + void *return_address = NULL; - if (ompt_enabled) { - ompt_parallel_id = __ompt_parallel_id_new(gtid); - ompt_task_id = __ompt_get_task_id_internal(0); - ompt_frame = __ompt_get_task_frame_internal(0); + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame, + NULL, NULL); + return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); } #endif @@ -1467,13 +1495,16 @@ #endif #if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { - int team_size = master_set_numthreads; - - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( - ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task, - OMPT_INVOKER(call_context)); + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_parallel_begin) { + int team_size = master_set_numthreads + ? master_set_numthreads + : get__nproc_2(parent_team, master_tid); + ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( + parent_task_data, ompt_frame, &ompt_parallel_data, team_size, + OMPT_INVOKER(call_context), return_address); + } + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -1510,27 +1541,25 @@ ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task, - ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); + if (ompt_enabled.enabled) { + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, + &ompt_parallel_data, return_address); exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - __ompt_lw_taskteam_link(&lw_taskteam, master_th); + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); + // don't use lw_taskteam after linking. content was swaped -#if OMPT_TRACE /* OMPT implicit task begin */ - my_task_id = lw_taskteam.ompt_task_info.task_id; - my_parallel_id = parent_team->t.ompt_team_info.parallel_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); + implicit_task_data = OMPT_CUR_TASK_DATA(master_th); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + implicit_task_data, 1, __kmp_tid_from_gtid(gtid)); } -#endif /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1549,34 +1578,27 @@ #if OMPT_SUPPORT *exit_runtime_p = NULL; - if (ompt_enabled) { -#if OMPT_TRACE - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - ompt_parallel_id, ompt_task_id); + if (ompt_enabled.enabled) { + OMPT_CUR_TASK_INFO(master_th)->frame.exit_runtime_frame = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, implicit_task_data, 1, + __kmp_tid_from_gtid(gtid)); } - __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; -#endif - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th), + OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = ompt_state_overhead; + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif return TRUE; } parent_team->t.t_pkfn = microtask; -#if OMPT_SUPPORT - parent_team->t.ompt_team_info.microtask = unwrapped_task; -#endif parent_team->t.t_invoke = invoker; KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel); parent_team->t.t_active_level++; @@ -1728,28 +1750,27 @@ #if OMPT_SUPPORT void *dummy; void **exit_runtime_p; + ompt_task_info_t *task_info; ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { + if (ompt_enabled.enabled) { __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = - &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - - __ompt_lw_taskteam_link(&lw_taskteam, master_th); - -#if OMPT_TRACE - my_task_id = lw_taskteam.ompt_task_info.task_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - ompt_parallel_id, my_task_id); + &ompt_parallel_data, return_address); + + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); + // don't use lw_taskteam after linking. content was swaped + + task_info = OMPT_CUR_TASK_INFO(master_th); + exit_runtime_p = &(task_info->frame.exit_runtime_frame); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid)); } -#endif /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1768,26 +1789,21 @@ } #if OMPT_SUPPORT - *exit_runtime_p = NULL; - if (ompt_enabled) { - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - -#if OMPT_TRACE - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - ompt_parallel_id, ompt_task_id); + if (ompt_enabled.enabled) { + exit_runtime_p = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), 1, + __kmp_tid_from_gtid(gtid)); } -#endif __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + OMPT_CUR_TEAM_DATA(master_th), parent_task_data, + OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = ompt_state_overhead; + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif } else if (microtask == (microtask_t)__kmp_teams_master) { @@ -1836,30 +1852,28 @@ #if OMPT_SUPPORT void *dummy; void **exit_runtime_p; + ompt_task_info_t *task_info; ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { + if (ompt_enabled.enabled) { __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = - &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); + &ompt_parallel_data, return_address); + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); + // don't use lw_taskteam after linking. content was swaped + task_info = OMPT_CUR_TASK_INFO(master_th); + exit_runtime_p = &(task_info->frame.exit_runtime_frame); - __ompt_lw_taskteam_link(&lw_taskteam, master_th); - -#if OMPT_TRACE /* OMPT implicit task begin */ - my_task_id = lw_taskteam.ompt_task_info.task_id; - my_parallel_id = ompt_parallel_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); + implicit_task_data = OMPT_CUR_TASK_DATA(master_th); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + implicit_task_data, 1, __kmp_tid_from_gtid(gtid)); } -#endif /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1877,26 +1891,22 @@ } #if OMPT_SUPPORT - *exit_runtime_p = NULL; - if (ompt_enabled) { -#if OMPT_TRACE - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - my_parallel_id, my_task_id); + if (ompt_enabled.enabled) { + *exit_runtime_p = NULL; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), 1, + __kmp_tid_from_gtid(gtid)); } -#endif + ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + &ompt_parallel_data, parent_task_data, + OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = ompt_state_overhead; + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif #if OMP_40_ENABLED @@ -1904,14 +1914,13 @@ #endif /* OMP_40_ENABLED */ } else if (call_context == fork_context_gnu) { #if OMPT_SUPPORT - ompt_lw_taskteam_t *lwt = - (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); - __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task, - ompt_parallel_id); + ompt_lw_taskteam_t lwt; + __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data, + return_address); - lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); - lwt->ompt_task_info.frame.exit_runtime_frame = NULL; - __ompt_lw_taskteam_link(lwt, master_th); + lwt.ompt_task_info.frame.exit_runtime_frame = NULL; + __ompt_lw_taskteam_link(&lwt, master_th, 1); +// don't use lw_taskteam after linking. content was swaped #endif // we were called from GNU native code @@ -2006,7 +2015,7 @@ KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); team = __kmp_allocate_team(root, nthreads, nthreads, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_data, #endif #if OMP_40_ENABLED proc_bind, @@ -2017,7 +2026,7 @@ KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); team = __kmp_allocate_team(root, nthreads, nthreads, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_data, #endif #if OMP_40_ENABLED proc_bind, @@ -2035,7 +2044,8 @@ KMP_CHECK_UPDATE(team->t.t_parent, parent_team); KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); #if OMPT_SUPPORT - KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task); + KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address, + return_address); #endif KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe // TODO: parent_team->t.t_level == INT_MAX ??? @@ -2169,7 +2179,7 @@ &master_th->th.th_current_task->td_icvs, loc); #if OMPT_SUPPORT - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + master_th->th.ompt_thread_info.state = omp_state_work_parallel; #endif __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); @@ -2253,8 +2263,8 @@ KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); #if OMPT_SUPPORT - if (ompt_enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; + if (ompt_enabled.enabled) { + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -2266,17 +2276,18 @@ kmp_team_t *team) { // restore state outside the region thread->th.ompt_thread_info.state = - ((team->t.t_serialized) ? ompt_state_work_serial - : ompt_state_work_parallel); + ((team->t.t_serialized) ? omp_state_work_serial + : omp_state_work_parallel); } -static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team, - ompt_parallel_id_t parallel_id, - fork_context_e fork_context) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); +static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, + kmp_team_t *team, ompt_data_t *parallel_data, + fork_context_e fork_context, void *codeptr) { + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context), + codeptr); } task_info->frame.reenter_runtime_frame = NULL; @@ -2313,8 +2324,8 @@ master_th->th.th_ident = loc; #if OMPT_SUPPORT - if (ompt_enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; + if (ompt_enabled.enabled) { + master_th->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -2351,7 +2362,7 @@ __kmpc_end_serialized_parallel(loc, gtid); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled.enabled) { __kmp_join_restore_state(master_th, parent_team); } #endif @@ -2379,7 +2390,8 @@ KMP_MB(); #if OMPT_SUPPORT - ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; + ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data); + void *codeptr = team->t.ompt_team_info.master_return_address; #endif #if USE_ITT_BUILD @@ -2451,8 +2463,9 @@ } #if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); + if (ompt_enabled.enabled) { + __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + codeptr); } #endif @@ -2481,15 +2494,18 @@ } KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - parallel_id, task_info->task_id); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_implicit_task) { + int ompt_team_size = team->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, + __kmp_tid_from_gtid(gtid)); } + task_info->frame.exit_runtime_frame = NULL; - task_info->task_id = 0; + task_info->task_data = ompt_data_none; } #endif @@ -2560,8 +2576,9 @@ __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); #if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); + if (ompt_enabled.enabled) { + __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + codeptr); } #endif @@ -3156,7 +3173,7 @@ 1, // new_nproc 1, // max_nproc #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED __kmp_nested_proc_bind.bind_types[0], @@ -3197,7 +3214,7 @@ 1, // new_nproc __kmp_dflt_team_nth_ub * 2, // max_nproc #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED __kmp_nested_proc_bind.bind_types[0], @@ -3736,6 +3753,9 @@ __kmp_print_thread_storage_map(root_thread, gtid); } root_thread->th.th_info.ds.ds_gtid = gtid; +#if OMPT_SUPPORT + root_thread->th.ompt_thread_info.thread_data.ptr = NULL; +#endif root_thread->th.th_root = root; if (__kmp_env_consistency_check) { root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); @@ -3758,7 +3778,7 @@ root_thread->th.th_serial_team = __kmp_allocate_team(root, 1, 1, #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED proc_bind_default, @@ -3828,6 +3848,29 @@ __kmp_root_counter++; +#if OMPT_SUPPORT + if (!initial_thread && ompt_enabled.enabled) { + + ompt_thread_t *root_thread = ompt_get_thread(); + + ompt_set_thread_state(root_thread, omp_state_overhead); + + if (ompt_enabled.ompt_callback_thread_begin) { + ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( + ompt_thread_initial, __ompt_get_thread_data_internal()); + } + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); + if (ompt_enabled.ompt_callback_task_create) { + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + NULL, NULL, task_data, ompt_task_initial, 0, NULL); + // initial task has nothing to return to + } + + ompt_set_thread_state(root_thread, omp_state_work_serial); + } +#endif + KMP_MB(); __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); @@ -3911,9 +3954,9 @@ #endif /* KMP_OS_WINDOWS */ #if OMPT_SUPPORT - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { - int gtid = __kmp_get_gtid(); - __ompt_thread_end(ompt_thread_initial, gtid); + if (ompt_enabled.ompt_callback_thread_end) { + ompt_callbacks.ompt_callback(ompt_callback_thread_end)( + &(root->r.r_uber_thread->th.ompt_thread_info.thread_data)); } #endif @@ -3963,7 +4006,7 @@ if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { #if OMPT_SUPPORT // the runtime is shutting down so we won't report any events - thread->th.ompt_thread_info.state = ompt_state_undefined; + thread->th.ompt_thread_info.state = omp_state_undefined; #endif __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); } @@ -4284,7 +4327,7 @@ new_thr->th.th_serial_team = serial_team = (kmp_team_t *)__kmp_allocate_team(root, 1, 1, #if OMPT_SUPPORT - 0, // root parallel id + ompt_data_none, // root parallel id #endif #if OMP_40_ENABLED proc_bind_default, @@ -4676,12 +4719,12 @@ place++; } - KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " - "partition = [%d,%d], __kmp_affinity_num_masks: %u\n", - __kmp_gtid_from_thread(team->t.t_threads[f]), - team->t.t_id, f, th->th.th_new_place, - th->th.th_first_place, th->th.th_last_place, - __kmp_affinity_num_masks)); + KA_TRACE(100, + ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " + "partition = [%d,%d], __kmp_affinity_num_masks: %u\n", + __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, + f, th->th.th_new_place, th->th.th_first_place, + th->th.th_last_place, __kmp_affinity_num_masks)); } } else { /* Having uniform space of available computation places I can create @@ -4689,7 +4732,7 @@ place of each partition. */ double current = static_cast(masters_place); double spacing = - (static_cast(n_places + 1) / static_cast(n_th)); + (static_cast(n_places + 1) / static_cast(n_th)); int first, last; kmp_info_t *th; @@ -4735,12 +4778,12 @@ th->th.th_new_place = place; th->th.th_last_place = last; - KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " - "partition = [%d,%d], spacing = %.4f\n", - __kmp_gtid_from_thread(team->t.t_threads[f]), - team->t.t_id, f, th->th.th_new_place, - th->th.th_first_place, th->th.th_last_place, - spacing)); + KA_TRACE(100, + ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " + "partition = [%d,%d], spacing = %.4f\n", + __kmp_gtid_from_thread(team->t.t_threads[f]), + team->t.t_id, f, th->th.th_new_place, + th->th.th_first_place, th->th.th_last_place, spacing)); } } } @@ -4815,7 +4858,7 @@ kmp_team_t * __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, + ompt_data_t ompt_parallel_data, #endif #if OMP_40_ENABLED kmp_proc_bind_t new_proc_bind, @@ -5182,7 +5225,7 @@ #endif #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_data); #endif KMP_MB(); @@ -5234,7 +5277,7 @@ team->t.t_id)); #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_data); #endif KMP_MB(); @@ -5242,9 +5285,10 @@ return team; } -/* reap team if it is too small, then loop back and check the next one */ -// not sure if this is wise, but, will be redone during the hot-teams rewrite. -/* TODO: Use technique to find the right size hot-team, don't reap them */ + /* reap team if it is too small, then loop back and check the next one */ + // not sure if this is wise, but, will be redone during the hot-teams + // rewrite. + /* TODO: Use technique to find the right size hot-team, don't reap them */ team = __kmp_reap_team(team); __kmp_team_pool = team; } @@ -5297,7 +5341,7 @@ #endif #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_data); team->t.ompt_serialized_team_info = NULL; #endif @@ -5564,16 +5608,26 @@ } #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; + ompt_data_t *thread_data; + if (ompt_enabled.enabled) { + thread_data = &(this_thr->th.ompt_thread_info.thread_data); + thread_data->ptr = NULL; + + this_thr->th.ompt_thread_info.state = omp_state_overhead; this_thr->th.ompt_thread_info.wait_id = 0; - this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); - if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { - __ompt_thread_begin(ompt_thread_worker, gtid); + this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); + if (ompt_enabled.ompt_callback_thread_begin) { + ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( + ompt_thread_worker, thread_data); } } #endif +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + this_thr->th.ompt_thread_info.state = omp_state_idle; + } +#endif /* This is the place where threads wait for work */ while (!TCR_4(__kmp_global.g.g_done)) { KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); @@ -5582,18 +5636,12 @@ /* wait for work to do */ KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); -#if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_idle; - } -#endif - /* No tid yet since not part of a team */ __kmp_fork_barrier(gtid, KMP_GTID_DNE); #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; + if (ompt_enabled.enabled) { + this_thr->th.ompt_thread_info.state = omp_state_overhead; } #endif @@ -5601,14 +5649,6 @@ /* have we been allocated? */ if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { -#if OMPT_SUPPORT - ompt_task_info_t *task_info; - ompt_parallel_id_t my_parallel_id; - if (ompt_enabled) { - task_info = __ompt_get_taskinfo(0); - my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id; - } -#endif /* we were just woken up, so run our new task */ if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { int rc; @@ -5620,11 +5660,8 @@ updateHWFPControl(*pteam); #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; - // Initialize OMPT task id for implicit task. - int tid = __kmp_tid_from_gtid(gtid); - task_info->task_id = __ompt_task_id_new(tid); + if (ompt_enabled.enabled) { + this_thr->th.ompt_thread_info.state = omp_state_work_parallel; } #endif @@ -5635,40 +5672,29 @@ } KMP_ASSERT(rc); -#if OMPT_SUPPORT - if (ompt_enabled) { - /* no frame set while outside task */ - task_info->frame.exit_runtime_frame = NULL; - - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif KMP_MB(); KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); } - /* join barrier after parallel region */ - __kmp_join_barrier(gtid); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - // don't access *pteam here: it may have already been freed - // by the master thread behind the barrier (possible race) - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - my_parallel_id, task_info->task_id); - } - task_info->frame.exit_runtime_frame = NULL; - task_info->task_id = 0; +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + /* no frame set while outside task */ + __ompt_get_task_info_object(0)->frame.exit_runtime_frame = NULL; + + this_thr->th.ompt_thread_info.state = omp_state_overhead; + this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr); } #endif + /* join barrier after parallel region */ + __kmp_join_barrier(gtid); } } TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); #if OMPT_SUPPORT - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { - __ompt_thread_end(ompt_thread_worker, gtid); + if (ompt_enabled.ompt_callback_thread_end) { + ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data); } #endif @@ -5902,10 +5928,10 @@ // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? KMP_MB(); /* Flush all pending memory write invalidates. */ -// Need to check that monitor was initialized before reaping it. If we are -// called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then -// __kmp_monitor will appear to contain valid data, but it is only valid in the -// parent process, not the child. + // Need to check that monitor was initialized before reaping it. If we are + // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then + // __kmp_monitor will appear to contain valid data, but it is only valid in + // the parent process, not the child. // New behavior (201008): instead of keying off of the flag // __kmp_init_parallel, the monitor thread creation is keyed off // of the new flag __kmp_init_monitor. @@ -6926,26 +6952,27 @@ #if OMPT_SUPPORT void *dummy; void **exit_runtime_p; - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; + ompt_data_t *my_task_data; + ompt_data_t *my_parallel_data; + int ompt_team_size; - if (ompt_enabled) { + if (ompt_enabled.enabled) { exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid] .ompt_task_info.frame.exit_runtime_frame); } else { exit_runtime_p = &dummy; } -#if OMPT_TRACE - my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id, - my_task_id); + my_task_data = + &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data); + my_parallel_data = &(team->t.ompt_team_info.parallel_data); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_team_size = team->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, + __kmp_tid_from_gtid(gtid)); } #endif -#endif { KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); @@ -6992,9 +7019,6 @@ SSC_MARK_FORKING(); #endif __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, -#if OMPT_SUPPORT - (void *)thr->th.th_teams_microtask, // "unwrapped" task -#endif (microtask_t)thr->th.th_teams_microtask, // "wrapped" task VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); #if INCLUDE_SSC_MARKS @@ -7171,6 +7195,36 @@ #endif /* KMP_DEBUG */ __kmp_join_barrier(gtid); /* wait for everyone */ +#if OMPT_SUPPORT + int ds_tid = this_thr->th.th_info.ds.ds_tid; + if (this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) { + ompt_data_t *tId = OMPT_CUR_TASK_DATA(this_thr); + ompt_data_t *pId = OMPT_CUR_TEAM_DATA(this_thr); + this_thr->th.ompt_thread_info.state = omp_state_overhead; +#if OMPT_OPTIONAL + void *codeptr = NULL; + if (KMP_MASTER_TID(ds_tid) && + (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) || + ompt_callbacks.ompt_callback(ompt_callback_sync_region))) + codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address; + + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_end, pId, tId, codeptr); + } +#endif + if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, tId, 0, ds_tid); + } + // return to idle state + this_thr->th.ompt_thread_info.state = omp_state_overhead; + } +#endif KMP_MB(); /* Flush all pending memory write invalidates. */ KMP_ASSERT(this_thr->th.th_team == team); Index: runtime/src/kmp_safe_c_api.h =================================================================== --- runtime/src/kmp_safe_c_api.h +++ runtime/src/kmp_safe_c_api.h @@ -8,7 +8,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_SAFE_C_API_H #define KMP_SAFE_C_API_H Index: runtime/src/kmp_sched.cpp =================================================================== --- runtime/src/kmp_sched.cpp +++ runtime/src/kmp_sched.cpp @@ -2,7 +2,6 @@ * kmp_sched.cpp -- static scheduling -- iteration initialization */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - /* Static scheduling initialization. NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however @@ -46,7 +44,12 @@ T *plower, T *pupper, typename traits_t::signed_t *pstride, typename traits_t::signed_t incr, - typename traits_t::signed_t chunk) { + typename traits_t::signed_t chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + void *codeptr +#endif + ) { KMP_COUNT_BLOCK(OMP_FOR_static); KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling); @@ -60,14 +63,29 @@ kmp_team_t *team; kmp_info_t *th = __kmp_threads[gtid]; -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT && OMPT_OPTIONAL ompt_team_info_t *team_info = NULL; ompt_task_info_t *task_info = NULL; + ompt_work_type_t ompt_work_type; - if (ompt_enabled) { + if (ompt_enabled.enabled) { // Only fully initialize variables needed by OMPT if OMPT is enabled. team_info = __ompt_get_teaminfo(0, NULL); - task_info = __ompt_get_taskinfo(0); + task_info = __ompt_get_task_info_object(0); + // Determine workshare type + if (loc != NULL) { + if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { + ompt_work_type = ompt_work_loop; + } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { + ompt_work_type = ompt_work_sections; + } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { + ompt_work_type = ompt_work_distribute; + } else { + KMP_ASSERT2(0, + "__kmpc_for_static_init: can't determine workshare type"); + } + KMP_DEBUG_ASSERT(ompt_work_type); + } } #endif @@ -121,10 +139,11 @@ #endif KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), 0, codeptr); } #endif KMP_COUNT_VALUE(FOR_static_iterations, 0); @@ -172,10 +191,11 @@ #endif KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), *pstride, codeptr); } #endif return; @@ -200,10 +220,11 @@ #endif KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), *pstride, codeptr); } #endif return; @@ -356,10 +377,11 @@ #endif KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), trip_count, codeptr); } #endif @@ -747,7 +769,12 @@ kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk) { __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk); + pupper, pstride, incr, chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_GET_RETURN_ADDRESS(0) +#endif + ); } /*! @@ -759,7 +786,12 @@ kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk) { __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk); + pupper, pstride, incr, chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_GET_RETURN_ADDRESS(0) +#endif + ); } /*! @@ -770,7 +802,12 @@ kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk) { __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk); + pupper, pstride, incr, chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_GET_RETURN_ADDRESS(0) +#endif + ); } /*! @@ -782,7 +819,12 @@ kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk) { __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, - pupper, pstride, incr, chunk); + pupper, pstride, incr, chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + OMPT_GET_RETURN_ADDRESS(0) +#endif + ); } /*! @} Index: runtime/src/kmp_settings.h =================================================================== --- runtime/src/kmp_settings.h +++ runtime/src/kmp_settings.h @@ -2,7 +2,6 @@ * kmp_settings.h -- Initialize environment variables */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_SETTINGS_H #define KMP_SETTINGS_H Index: runtime/src/kmp_settings.cpp =================================================================== --- runtime/src/kmp_settings.cpp +++ runtime/src/kmp_settings.cpp @@ -2,7 +2,6 @@ * kmp_settings.cpp -- Initialize environment variables */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_affinity.h" #include "kmp_atomic.h" @@ -24,7 +22,7 @@ #include "kmp_settings.h" #include "kmp_str.h" #include "kmp_wrapper_getpid.h" -#include // toupper() +#include // toupper() static int __kmp_env_toPrint(char const *name, int flag); @@ -336,13 +334,11 @@ } } // __kmp_stg_parse_size -#if KMP_AFFINITY_SUPPORTED static void __kmp_stg_parse_str(char const *name, char const *value, char const **out) { __kmp_str_free(out); *out = __kmp_str_format("%s", value); } // __kmp_stg_parse_str -#endif static void __kmp_stg_parse_int( char const @@ -4356,7 +4352,29 @@ #endif -// ----------------------------------------------------------------------------- +#if OMP_50_ENABLED && OMPT_SUPPORT + +static void __kmp_stg_parse_omp_tool_libraries(char const *name, + char const *value, void *data) { + __kmp_stg_parse_str(name, value, &__kmp_tool_libraries); +} // __kmp_stg_parse_omp_tool_libraries + +static void __kmp_stg_print_omp_tool_libraries(kmp_str_buf_t *buffer, + char const *name, void *data) { + if (__kmp_tool_libraries) + __kmp_stg_print_str(buffer, name, __kmp_tool_libraries); + else { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); + } + __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); + } +} // __kmp_stg_print_omp_tool_libraries + +#endif + // Table. static kmp_setting_t __kmp_stg_table[] = { @@ -4600,6 +4618,12 @@ {"OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation, __kmp_stg_print_omp_cancellation, NULL, 0, 0}, #endif + +#if OMP_50_ENABLED && OMPT_SUPPORT + {"OMP_TOOL_LIBRARIES", __kmp_stg_parse_omp_tool_libraries, + __kmp_stg_print_omp_tool_libraries, NULL, 0, 0}, +#endif + {"", NULL, NULL, NULL, 0, 0}}; // settings static int const __kmp_stg_count = @@ -4928,7 +4952,6 @@ if (value) { ompc_set_dynamic(__kmp_global.g.g_dynamic); } - } void __kmp_env_initialize(char const *string) { @@ -4960,7 +4983,7 @@ } } -// We need to know if blocktime was set when processing OMP_WAIT_POLICY + // We need to know if blocktime was set when processing OMP_WAIT_POLICY blocktime_str = __kmp_env_blk_var(&block, "KMP_BLOCKTIME"); // Special case. If we parse environment, not a string, process KMP_WARNINGS Index: runtime/src/kmp_stats.h =================================================================== --- runtime/src/kmp_stats.h +++ runtime/src/kmp_stats.h @@ -5,7 +5,6 @@ * Functions for collecting statistics. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -15,7 +14,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp_config.h" #if KMP_STATS_ENABLED Index: runtime/src/kmp_stats.cpp =================================================================== --- runtime/src/kmp_stats.cpp +++ runtime/src/kmp_stats.cpp @@ -2,7 +2,6 @@ * Statistics gathering and processing. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_lock.h" #include "kmp_stats.h" Index: runtime/src/kmp_stats_timing.h =================================================================== --- runtime/src/kmp_stats_timing.h +++ runtime/src/kmp_stats_timing.h @@ -5,7 +5,6 @@ * Access to real time clock and timers. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -15,7 +14,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp_os.h" #include #include @@ -59,11 +57,11 @@ tsc_tick_count() : my_count(static_cast(__builtin_readcyclecounter())) {} #elif KMP_HAVE___RDTSC - tsc_tick_count() : my_count(static_cast(__rdtsc())){} + tsc_tick_count() : my_count(static_cast(__rdtsc())) {} #else #error Must have high resolution timer defined #endif - tsc_tick_count(int64_t value) : my_count(value){} + tsc_tick_count(int64_t value) : my_count(value) {} int64_t getValue() const { return my_count; } tsc_tick_count later(tsc_tick_count const other) const { return my_count > other.my_count ? (*this) : other; Index: runtime/src/kmp_stats_timing.cpp =================================================================== --- runtime/src/kmp_stats_timing.cpp +++ runtime/src/kmp_stats_timing.cpp @@ -2,7 +2,6 @@ * Timing functions */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include #include Index: runtime/src/kmp_str.h =================================================================== --- runtime/src/kmp_str.h +++ runtime/src/kmp_str.h @@ -2,7 +2,6 @@ * kmp_str.h -- String manipulation routines. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_STR_H #define KMP_STR_H Index: runtime/src/kmp_str.cpp =================================================================== --- runtime/src/kmp_str.cpp +++ runtime/src/kmp_str.cpp @@ -2,7 +2,6 @@ * kmp_str.cpp -- String manipulation routines. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp_str.h" #include // va_* Index: runtime/src/kmp_stub.h =================================================================== --- runtime/src/kmp_stub.h +++ runtime/src/kmp_stub.h @@ -2,7 +2,6 @@ * kmp_stub.h */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_STUB_H #define KMP_STUB_H Index: runtime/src/kmp_stub.cpp =================================================================== --- runtime/src/kmp_stub.cpp +++ runtime/src/kmp_stub.cpp @@ -2,7 +2,6 @@ * kmp_stub.cpp -- stub versions of user-callable OpenMP RT functions. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include #include #include Index: runtime/src/kmp_taskdeps.cpp =================================================================== --- runtime/src/kmp_taskdeps.cpp +++ runtime/src/kmp_taskdeps.cpp @@ -2,7 +2,6 @@ * kmp_taskdeps.cpp */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,12 +11,14 @@ // //===----------------------------------------------------------------------===// - //#define KMP_SUPPORT_GRAPH_OUTPUT 1 #include "kmp.h" #include "kmp_io.h" #include "kmp_wait_release.h" +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif #if OMP_40_ENABLED @@ -219,18 +220,19 @@ task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource); #endif -#if OMPT_SUPPORT && OMPT_TRACE - // OMPT tracks dependences between task (a=source, b=sink) in which - // task a blocks the execution of b through the ompt_new_dependence_callback - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + /* OMPT tracks dependences between task (a=source, b=sink) in which + task a blocks the execution of b through the ompt_new_dependence_callback + */ + if (ompt_enabled.ompt_callback_task_dependence) { kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); - ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)( - task_source->ompt_task_info.task_id, task_sink->ompt_task_info.task_id); + ompt_callbacks.ompt_callback(ompt_callback_task_dependence)( + &(task_source->ompt_task_info.task_data), + &(task_sink->ompt_task_info.task_data)); } -#endif /* OMPT_SUPPORT && OMPT_TRACE */ +#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */ } template @@ -472,10 +474,29 @@ kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *current_task = thread->th.th_current_task; -#if OMPT_SUPPORT && OMPT_TRACE +#if OMPT_SUPPORT + OMPT_STORE_RETURN_ADDRESS(gtid); + + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_task_create) { + kmp_taskdata_t *parent = new_taskdata->td_parent; + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + parent ? &(parent->ompt_task_info.task_data) : &task_data, + parent ? &(parent->ompt_task_info.frame) : NULL, + &(new_taskdata->ompt_task_info.task_data), + ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1, + OMPT_LOAD_RETURN_ADDRESS(gtid)); + } + + new_taskdata->ompt_task_info.frame.reenter_runtime_frame = + OMPT_GET_FRAME_ADDRESS(0); + } + +#if OMPT_OPTIONAL /* OMPT grab all dependences if requested by the tool */ - if (ompt_enabled && ndeps + ndeps_noalias > 0 && - ompt_callbacks.ompt_callback(ompt_event_task_dependences)) { + if (ndeps + ndeps_noalias > 0 && + ompt_enabled.ompt_callback_task_dependences) { kmp_int32 i; new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias; @@ -511,8 +532,17 @@ new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags = ompt_task_dependence_type_in; } + ompt_callbacks.ompt_callback(ompt_callback_task_dependences)( + &(new_taskdata->ompt_task_info.task_data), + new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps); + /* We can now free the allocated memory for the dependencies */ + /* For OMPD we might want to delay the free until task_end */ + KMP_OMPT_DEPS_FREE(thread, new_taskdata->ompt_task_info.deps); + new_taskdata->ompt_task_info.deps = NULL; + new_taskdata->ompt_task_info.ndeps = 0; } -#endif /* OMPT_SUPPORT && OMPT_TRACE */ +#endif /* OMPT_OPTIONAL */ +#endif /* OMPT_SUPPORT */ bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || @@ -559,7 +589,7 @@ "loc=%p task=%p, transferring to __kmpc_omp_task\n", gtid, loc_ref, new_taskdata)); - return __kmpc_omp_task(loc_ref, gtid, new_task); + return __kmp_omp_task(gtid, new_task, true); } /*! Index: runtime/src/kmp_tasking.cpp =================================================================== --- runtime/src/kmp_tasking.cpp +++ runtime/src/kmp_tasking.cpp @@ -2,7 +2,6 @@ * kmp_tasking.cpp -- OpenMP 3.0 tasking support. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_i18n.h" #include "kmp_itt.h" @@ -448,40 +446,77 @@ KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata)); + return; +} + #if OMPT_SUPPORT - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_begin)) { - kmp_taskdata_t *parent = taskdata->td_parent; - ompt_callbacks.ompt_callback(ompt_event_task_begin)( - parent ? parent->ompt_task_info.task_id : ompt_task_id_none, - parent ? &(parent->ompt_task_info.frame) : NULL, - taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.function); - } -#endif -#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE - /* OMPT emit all dependences if requested by the tool */ - if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 && - ompt_callbacks.ompt_callback(ompt_event_task_dependences)) { - ompt_callbacks.ompt_callback(ompt_event_task_dependences)( - taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.deps, - taskdata->ompt_task_info.ndeps); - /* We can now free the allocated memory for the dependencies */ - KMP_OMPT_DEPS_FREE(thread, taskdata->ompt_task_info.deps); - taskdata->ompt_task_info.deps = NULL; - taskdata->ompt_task_info.ndeps = 0; - } -#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */ +//------------------------------------------------------------------------------ +// __ompt_task_init: +// Initialize OMPT fields maintained by a task. This will only be called after +// ompt_start_tool, so we already know whether ompt is enabled or not. + +static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) { + // The calls to __ompt_task_init already have the ompt_enabled condition. + task->ompt_task_info.task_data.value = 0; + task->ompt_task_info.frame.exit_runtime_frame = NULL; + task->ompt_task_info.frame.reenter_runtime_frame = NULL; +#if OMP_40_ENABLED + task->ompt_task_info.ndeps = 0; + task->ompt_task_info.deps = NULL; +#endif /* OMP_40_ENABLED */ +} - return; +// __ompt_task_start: +// Build and trigger task-begin event +static inline void __ompt_task_start(kmp_task_t *task, + kmp_taskdata_t *current_task, + kmp_int32 gtid) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + ompt_task_status_t status = ompt_task_others; + if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) { + status = ompt_task_yield; + __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0; + } + /* let OMPT know that we're about to run this task */ + if (ompt_enabled.ompt_callback_task_schedule) { + ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( + &(current_task->ompt_task_info.task_data), status, + &(taskdata->ompt_task_info.task_data)); + } + taskdata->ompt_task_info.scheduling_parent = current_task; } -// __kmpc_omp_task_begin_if0: report that a given serialized task has started -// execution -// -// loc_ref: source location information; points to beginning of task block. -// gtid: global thread number. -// task: task thunk for the started task. -void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task) { +// __ompt_task_finish: +// Build and trigger final task-schedule event +static inline void __ompt_task_finish(kmp_task_t *task, + kmp_taskdata_t *resumed_task) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + ompt_task_status_t status = ompt_task_complete; + if (taskdata->td_flags.tiedness == TASK_UNTIED && + KMP_TEST_THEN_ADD32(&(taskdata->td_untied_count), 0) > 1) + status = ompt_task_others; + if (__kmp_omp_cancellation && taskdata->td_taskgroup && + taskdata->td_taskgroup->cancel_request == cancel_taskgroup) { + status = ompt_task_cancel; + } + + /* let OMPT know that we're returning to the callee task */ + if (ompt_enabled.ompt_callback_task_schedule) { + ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( + &(taskdata->ompt_task_info.task_data), status, + &((resumed_task ? resumed_task + : (taskdata->ompt_task_info.scheduling_parent + ? taskdata->ompt_task_info.scheduling_parent + : taskdata->td_parent)) + ->ompt_task_info.task_data)); + } +} +#endif + +template +static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, void *frame_address, + void *return_address) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; @@ -500,12 +535,61 @@ taskdata->td_flags.task_serial = 1; // Execute this task immediately, not deferred. + __kmp_task_start(gtid, task, current_task); +#if OMPT_SUPPORT + if(ompt) + { + if (current_task->ompt_task_info.frame.reenter_runtime_frame == NULL) { + current_task->ompt_task_info.frame.reenter_runtime_frame = + taskdata->ompt_task_info.frame.exit_runtime_frame = frame_address; + } + if (ompt_enabled.ompt_callback_task_create) { + ompt_task_info_t *parent_info = &(current_task->ompt_task_info); + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + &(parent_info->task_data), &(parent_info->frame), + &(taskdata->ompt_task_info.task_data), + ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0, + return_address); + } + __ompt_task_start(task, current_task, gtid); + } +#endif // OMPT_SUPPORT + KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid, loc_ref, taskdata)); +} - return; +#if OMPT_SUPPORT +OMPT_NOINLINE +static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task, void *frame_address, + void *return_address) { + return __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, + frame_address, return_address); +} +#endif // OMPT_SUPPORT + +// __kmpc_omp_task_begin_if0: report that a given serialized task has started +// execution +// +// loc_ref: source location information; points to beginning of task block. +// gtid: global thread number. +// task: task thunk for the started task. +void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled)) { + OMPT_STORE_RETURN_ADDRESS(gtid); + __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task, + OMPT_GET_FRAME_ADDRESS(1), + OMPT_LOAD_RETURN_ADDRESS(gtid)); + return; + } +#endif + return __kmpc_omp_task_begin_if0_template(loc_ref, gtid, task, + NULL, NULL); } #ifdef TASK_UNUSED @@ -625,14 +709,6 @@ thread->th.th_task_team; // might be NULL for serial teams... kmp_int32 children = 0; -#if OMPT_SUPPORT - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_end)) { - kmp_taskdata_t *parent = taskdata->td_parent; - ompt_callbacks.ompt_callback(ompt_event_task_end)( - taskdata->ompt_task_info.task_id); - } -#endif - KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming " "task %p\n", gtid, taskdata, resumed_task)); @@ -762,23 +838,55 @@ return; } -// __kmpc_omp_task_complete_if0: report that a task has completed execution -// -// loc_ref: source location information; points to end of task block. -// gtid: global thread number. -// task: task thunk for the completed task. -void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, - kmp_task_t *task) { +template +static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); + // this routine will provide task to resume __kmp_task_finish(gtid, task, NULL); KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); + +#if OMPT_SUPPORT + if(ompt) + { + __ompt_task_finish(task, NULL); + ompt_frame_t *ompt_frame; + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + ompt_frame->reenter_runtime_frame = NULL; + } +#endif + return; } +#if OMPT_SUPPORT +OMPT_NOINLINE +void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { + __kmpc_omp_task_complete_if0_template(loc_ref, gtid, task); +} +#endif // OMPT_SUPPORT + +// __kmpc_omp_task_complete_if0: report that a task has completed execution +// +// loc_ref: source location information; points to end of task block. +// gtid: global thread number. +// task: task thunk for the completed task. +void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled)) { + __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task); + return; + } +#endif + __kmpc_omp_task_complete_if0_template(loc_ref, gtid, task); +} + #ifdef TASK_UNUSED // __kmpc_omp_task_complete: report that a task has completed execution // NEVER GENERATED BY COMPILER, DEPRECATED!!! @@ -795,25 +903,6 @@ } #endif // TASK_UNUSED -#if OMPT_SUPPORT -// __kmp_task_init_ompt: Initialize OMPT fields maintained by a task. This will -// only be called after ompt_tool, so we already know whether ompt is enabled -// or not. -static inline void __kmp_task_init_ompt(kmp_taskdata_t *task, int tid, - void *function) { - if (ompt_enabled) { - task->ompt_task_info.task_id = __ompt_task_id_new(tid); - task->ompt_task_info.function = function; - task->ompt_task_info.frame.exit_runtime_frame = NULL; - task->ompt_task_info.frame.reenter_runtime_frame = NULL; -#if OMP_40_ENABLED - task->ompt_task_info.ndeps = 0; - task->ompt_task_info.deps = NULL; -#endif /* OMP_40_ENABLED */ - } -} -#endif - // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit // task for a given thread // @@ -878,7 +967,8 @@ } #if OMPT_SUPPORT - __kmp_task_init_ompt(task, tid, NULL); + if (__builtin_expect(ompt_enabled.enabled, 0)) + __ompt_task_init(task, tid); #endif KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid, @@ -1123,7 +1213,8 @@ ANNOTATE_HAPPENS_BEFORE(task); #if OMPT_SUPPORT - __kmp_task_init_ompt(taskdata, gtid, (void *)task_entry); + if (__builtin_expect(ompt_enabled.enabled, 0)) + __ompt_task_init(taskdata, gtid); #endif return task; @@ -1209,7 +1300,7 @@ if (taskdata->td_flags.proxy != TASK_PROXY) { #endif ANNOTATE_HAPPENS_AFTER(task); - __kmp_task_start(gtid, task, current_task); + __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded #if OMP_45_ENABLED } #endif @@ -1217,14 +1308,16 @@ #if OMPT_SUPPORT ompt_thread_info_t oldInfo; kmp_info_t *thread; - if (ompt_enabled) { + if (__builtin_expect(ompt_enabled.enabled, 0)) { // Store the threads states and restore them after the task thread = __kmp_threads[gtid]; oldInfo = thread->th.ompt_thread_info; thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = ompt_state_work_parallel; + thread->th.ompt_thread_info.state = (thread->th.th_team_serialized) + ? omp_state_work_serial + : omp_state_work_parallel; taskdata->ompt_task_info.frame.exit_runtime_frame = - __builtin_frame_address(0); + OMPT_GET_FRAME_ADDRESS(0); } #endif @@ -1238,6 +1331,18 @@ kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_data_t *task_data; + if (__builtin_expect(ompt_enabled.ompt_callback_cancel, 0)) { + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); + ompt_callbacks.ompt_callback(ompt_callback_cancel)( + task_data, + ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup + : ompt_cancel_parallel) | + ompt_cancel_discarded_task, + NULL); + } +#endif KMP_COUNT_BLOCK(TASK_cancelled); // this task belongs to a task group and we need to cancel it discard = 1 /* true */; @@ -1272,13 +1377,10 @@ #endif // KMP_STATS_ENABLED #endif // OMP_40_ENABLED -#if OMPT_SUPPORT && OMPT_TRACE - /* let OMPT know that we're about to run this task */ - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) { - ompt_callbacks.ompt_callback(ompt_event_task_switch)( - current_task->ompt_task_info.task_id, - taskdata->ompt_task_info.task_id); - } +// OMPT task begin +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled, 0)) + __ompt_task_start(task, current_task, gtid); #endif #ifdef KMP_GOMP_COMPAT @@ -1291,21 +1393,16 @@ } KMP_POP_PARTITIONED_TIMER(); -#if OMPT_SUPPORT && OMPT_TRACE - /* let OMPT know that we're returning to the callee task */ - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) { - ompt_callbacks.ompt_callback(ompt_event_task_switch)( - taskdata->ompt_task_info.task_id, - current_task->ompt_task_info.task_id); - } +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled, 0)) + __ompt_task_finish(task, current_task); #endif - #if OMP_40_ENABLED } #endif // OMP_40_ENABLED #if OMPT_SUPPORT - if (ompt_enabled) { + if (__builtin_expect(ompt_enabled.enabled, 0)) { thread->th.ompt_thread_info = oldInfo; taskdata->ompt_task_info.frame.exit_runtime_frame = NULL; } @@ -1316,7 +1413,7 @@ if (taskdata->td_flags.proxy != TASK_PROXY) { #endif ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent); - __kmp_task_finish(gtid, task, current_task); + __kmp_task_finish(gtid, task, current_task); // OMPT only if not discarded #if OMP_45_ENABLED } #endif @@ -1354,6 +1451,21 @@ KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, new_taskdata)); +#if OMPT_SUPPORT + kmp_taskdata_t *parent; + if (__builtin_expect(ompt_enabled.enabled, 0)) { + parent = new_taskdata->td_parent; + if (ompt_enabled.ompt_callback_task_create) { + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + parent ? &(parent->ompt_task_info.task_data) : &task_data, + parent ? &(parent->ompt_task_info.frame) : NULL, + &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0, + OMPT_GET_RETURN_ADDRESS(0)); + } + } +#endif + /* Should we execute the new task or queue it? For now, let's just always try to queue it. If the queue fills up, then we'll execute it. */ @@ -1371,6 +1483,11 @@ gtid, loc_ref, new_taskdata)); ANNOTATE_HAPPENS_BEFORE(new_task); +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled, 0)) { + parent->ompt_task_info.frame.reenter_runtime_frame = NULL; + } +#endif return TASK_CURRENT_NOT_QUEUED; } @@ -1389,13 +1506,6 @@ bool serialize_immediate) { kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); -#if OMPT_SUPPORT - if (ompt_enabled) { - new_taskdata->ompt_task_info.frame.reenter_runtime_frame = - __builtin_frame_address(1); - } -#endif - /* Should we execute the new task or queue it? For now, let's just always try to queue it. If the queue fills up, then we'll execute it. */ #if OMP_45_ENABLED @@ -1411,12 +1521,6 @@ __kmp_invoke_task(gtid, new_task, current_task); } -#if OMPT_SUPPORT - if (ompt_enabled) { - new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; - } -#endif - ANNOTATE_HAPPENS_BEFORE(new_task); return TASK_CURRENT_NOT_QUEUED; } @@ -1438,23 +1542,50 @@ kmp_int32 res; KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); -#if KMP_DEBUG +#if KMP_DEBUG || OMPT_SUPPORT kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); #endif KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, new_taskdata)); +#if OMPT_SUPPORT + kmp_taskdata_t *parent = NULL; + if (__builtin_expect(ompt_enabled.enabled && !new_taskdata->td_flags.started, + 0)) { + OMPT_STORE_RETURN_ADDRESS(gtid); + parent = new_taskdata->td_parent; + if (!parent->ompt_task_info.frame.reenter_runtime_frame) + parent->ompt_task_info.frame.reenter_runtime_frame = + OMPT_GET_FRAME_ADDRESS(1); + if (ompt_enabled.ompt_callback_task_create) { + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + parent ? &(parent->ompt_task_info.task_data) : &task_data, + parent ? &(parent->ompt_task_info.frame) : NULL, + &(new_taskdata->ompt_task_info.task_data), + ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, + OMPT_LOAD_RETURN_ADDRESS(gtid)); + } + } +#endif + res = __kmp_omp_task(gtid, new_task, true); KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata)); +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled && parent != NULL, 0)) { + parent->ompt_task_info.frame.reenter_runtime_frame = NULL; + } +#endif return res; } -// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are -// complete -kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { +template +static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, + void *return_address) { kmp_taskdata_t *taskdata; kmp_info_t *thread; int thread_finished = FALSE; @@ -1465,23 +1596,30 @@ if (__kmp_tasking_mode != tskm_immediate_exec) { thread = __kmp_threads[gtid]; taskdata = thread->th.th_current_task; -#if OMPT_SUPPORT && OMPT_TRACE - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; - - if (ompt_enabled) { - kmp_team_t *team = thread->th.th_team; - my_task_id = taskdata->ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - - taskdata->ompt_task_info.frame.reenter_runtime_frame = - __builtin_frame_address(1); - if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) { - ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(my_parallel_id, - my_task_id); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_data_t *my_task_data; + ompt_data_t *my_parallel_data; + + if(ompt){ + my_task_data = &(taskdata->ompt_task_info.task_data); + my_parallel_data = OMPT_CUR_TEAM_DATA(thread); + + taskdata->ompt_task_info.frame.reenter_runtime_frame = frame_address; + + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, + my_task_data, return_address); + } + + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, + my_task_data, return_address); } } -#endif +#endif // OMPT_SUPPORT && OMPT_OPTIONAL // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. @@ -1524,15 +1662,22 @@ // negated. taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) { - ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(my_parallel_id, - my_task_id); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if(ompt){ + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, + my_task_data, return_address); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, + my_task_data, return_address); } taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; } -#endif +#endif // OMPT_SUPPORT && OMPT_OPTIONAL + ANNOTATE_HAPPENS_AFTER(taskdata); } @@ -1543,6 +1688,30 @@ return TASK_CURRENT_NOT_QUEUED; } +#if OMPT_SUPPORT +OMPT_NOINLINE +static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, + void *frame_address, + void *return_address) { + return __kmpc_omp_taskwait_template(loc_ref, gtid, frame_address, + return_address); +} +#endif // OMPT_SUPPORT + +// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are +// complete +kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (UNLIKELY(ompt_enabled.enabled)) { + OMPT_STORE_RETURN_ADDRESS(gtid); + return __kmpc_omp_taskwait_template(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(1), + OMPT_LOAD_RETURN_ADDRESS(gtid)); + } +#endif + return __kmpc_omp_taskwait_template(loc_ref, gtid, NULL, + NULL); +} + // __kmpc_omp_taskyield: switch to a different task kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) { kmp_taskdata_t *taskdata; @@ -1577,10 +1746,18 @@ kmp_task_team_t *task_team = thread->th.th_task_team; if (task_team != NULL) { if (KMP_TASKING_ENABLED(task_team)) { +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled, 0)) + thread->th.ompt_thread_info.ompt_task_yielded = 1; +#endif __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint); +#if OMPT_SUPPORT + if (__builtin_expect(ompt_enabled.enabled, 0)) + thread->th.ompt_thread_info.ompt_task_yielded = 0; +#endif } } } @@ -1811,6 +1988,22 @@ tg_new->reduce_num_data = 0; #endif taskdata->td_taskgroup = tg_new; + +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region, 0)) { + void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + kmp_team_t *team = thread->th.th_team; + ompt_data_t my_task_data = taskdata->ompt_task_info.task_data; + // FIXME: I think this is wrong for lwt! + ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data; + + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), + &(my_task_data), codeptr); + } +#endif } // __kmpc_end_taskgroup: Wait until all tasks generated by the current task @@ -1821,6 +2014,22 @@ kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; int thread_finished = FALSE; +#if OMPT_SUPPORT && OMPT_OPTIONAL + kmp_team_t *team; + ompt_data_t my_task_data; + ompt_data_t my_parallel_data; + void *codeptr; + if (__builtin_expect(ompt_enabled.enabled, 0)) { + team = thread->th.th_team; + my_task_data = taskdata->ompt_task_info.task_data; + // FIXME: I think this is wrong for lwt! + my_parallel_data = team->t.ompt_team_info.parallel_data; + codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); + if (!codeptr) + codeptr = OMPT_GET_RETURN_ADDRESS(0); + } +#endif + KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc)); KMP_DEBUG_ASSERT(taskgroup != NULL); KMP_SET_THREAD_STATE_BLOCK(TASKGROUP); @@ -1834,6 +2043,14 @@ __kmp_itt_taskwait_starting(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region_wait, 0)) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), + &(my_task_data), codeptr); + } +#endif + #if OMP_45_ENABLED if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && @@ -1850,6 +2067,14 @@ } } +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region_wait, 0)) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), + &(my_task_data), codeptr); + } +#endif + #if USE_ITT_BUILD if (itt_sync_obj != NULL) __kmp_itt_taskwait_finished(gtid, itt_sync_obj); @@ -1869,6 +2094,14 @@ KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata)); ANNOTATE_HAPPENS_AFTER(taskdata); + +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (__builtin_expect(ompt_enabled.ompt_callback_sync_region, 0)) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), + &(my_task_data), codeptr); + } +#endif } #endif @@ -3257,8 +3490,8 @@ ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n", thread, taskdata, taskdata->td_parent)); #if OMPT_SUPPORT - __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, - (void *)task->routine); + if (__builtin_expect(ompt_enabled.enabled, 0)) + __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid); #endif return task; } @@ -3541,8 +3774,24 @@ "grain %llu(%d), dup %p\n", gtid, taskdata, *lb, *ub, st, grainsize, sched, task_dup)); - if (nogroup == 0) +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), + 0, + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif + + if (nogroup == 0) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_taskgroup(loc, gtid); + } // ========================================================================= // calculate loop parameters @@ -3616,6 +3865,9 @@ if (if_val == 0) { // if(0) specified, mark task as serial taskdata->td_flags.task_serial = 1; taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif // always start serial tasks linearly __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, task_dup); @@ -3623,18 +3875,37 @@ KA_TRACE(20, ("__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" "(%lld), grain %llu, extras %llu\n", gtid, tc, num_tasks, num_tasks_min, grainsize, extras)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, num_tasks_min, task_dup); } else { KA_TRACE(20, ("__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" "(%lld), grain %llu, extras %llu\n", gtid, tc, num_tasks, num_tasks_min, grainsize, extras)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks, grainsize, extras, tc, task_dup); } - if (nogroup == 0) + if (nogroup == 0) { +#if OMPT_SUPPORT && OMPT_OPTIONAL + OMPT_STORE_RETURN_ADDRESS(gtid); +#endif __kmpc_end_taskgroup(loc, gtid); + } +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data), + &(task_info->task_data), + 0, + OMPT_GET_RETURN_ADDRESS(0)); + } +#endif KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid)); } Index: runtime/src/kmp_taskq.cpp =================================================================== --- runtime/src/kmp_taskq.cpp +++ runtime/src/kmp_taskq.cpp @@ -2,7 +2,6 @@ * kmp_taskq.cpp -- TASKQ support for OpenMP. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_error.h" #include "kmp_i18n.h" Index: runtime/src/kmp_threadprivate.cpp =================================================================== --- runtime/src/kmp_threadprivate.cpp +++ runtime/src/kmp_threadprivate.cpp @@ -2,7 +2,6 @@ * kmp_threadprivate.cpp -- OpenMP threadprivate support library */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_i18n.h" #include "kmp_itt.h" Index: runtime/src/kmp_utility.cpp =================================================================== --- runtime/src/kmp_utility.cpp +++ runtime/src/kmp_utility.cpp @@ -2,7 +2,6 @@ * kmp_utility.cpp -- Utility routines for the OpenMP support library. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_i18n.h" #include "kmp_str.h" Index: runtime/src/kmp_version.h =================================================================== --- runtime/src/kmp_version.h +++ runtime/src/kmp_version.h @@ -2,7 +2,6 @@ * kmp_version.h -- version number for this release */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_VERSION_H #define KMP_VERSION_H Index: runtime/src/kmp_version.cpp =================================================================== --- runtime/src/kmp_version.cpp +++ runtime/src/kmp_version.cpp @@ -2,7 +2,6 @@ * kmp_version.cpp */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_io.h" #include "kmp_version.h" Index: runtime/src/kmp_wait_release.h =================================================================== --- runtime/src/kmp_wait_release.h +++ runtime/src/kmp_wait_release.h @@ -2,7 +2,6 @@ * kmp_wait_release.h -- Wait/Release implementation */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,13 +11,15 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_WAIT_RELEASE_H #define KMP_WAIT_RELEASE_H #include "kmp.h" #include "kmp_itt.h" #include "kmp_stats.h" +#if OMPT_SUPPORT +#include "ompt-specific.h" +#endif /*! @defgroup WAIT_RELEASE Wait/Release operations @@ -87,6 +88,44 @@ */ }; +#if OMPT_SUPPORT +static inline void __ompt_implicit_task_end(kmp_info_t *this_thr, + omp_state_t omp_state, + ompt_data_t *tId, + ompt_data_t *pId) { + int ds_tid = this_thr->th.th_info.ds.ds_tid; + if (omp_state == omp_state_wait_barrier_implicit) { + this_thr->th.ompt_thread_info.state = omp_state_overhead; +#if OMPT_OPTIONAL + void *codeptr = NULL; + if (ompt_enabled.ompt_callback_sync_region_wait) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( + ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr); + } + if (ompt_enabled.ompt_callback_sync_region) { + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( + ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr); + } +#endif + if (!KMP_MASTER_TID(ds_tid)) { + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, tId, 0, ds_tid); + } +#if OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_idle) { + ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin); + } +#endif + // return to idle state + this_thr->th.ompt_thread_info.state = omp_state_idle; + } else { + this_thr->th.ompt_thread_info.state = omp_state_overhead; + } + } +} +#endif + /* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_* must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */ @@ -118,30 +157,88 @@ stats_state_e thread_state = KMP_GET_THREAD_STATE(); #endif -#if OMPT_SUPPORT && OMPT_BLAME - ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state; - if (ompt_enabled && ompt_state != ompt_state_undefined) { - if (ompt_state == ompt_state_idle) { - if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) { - ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1); - } - } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) { - KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || - ompt_state == ompt_state_wait_barrier_implicit || - ompt_state == ompt_state_wait_barrier_explicit); - +/* OMPT Behavior: +THIS function is called from + __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions) + these have join / fork behavior + + In these cases, we don't change the state or trigger events in THIS +function. + Events are triggered in the calling code (__kmp_barrier): + + state := omp_state_overhead + barrier-begin + barrier-wait-begin + state := omp_state_wait_barrier + call join-barrier-implementation (finally arrive here) + {} + call fork-barrier-implementation (finally arrive here) + {} + state := omp_state_overhead + barrier-wait-end + barrier-end + state := omp_state_work_parallel + + + __kmp_fork_barrier (after thread creation, before executing implicit task) + call fork-barrier-implementation (finally arrive here) + {} // worker arrive here with state = omp_state_idle + + + __kmp_join_barrier (implicit barrier at end of parallel region) + state := omp_state_barrier_implicit + barrier-begin + barrier-wait-begin + call join-barrier-implementation (finally arrive here +final_spin=FALSE) + { + } + __kmp_fork_barrier (implicit barrier at end of parallel region) + call fork-barrier-implementation (finally arrive here final_spin=TRUE) + + Worker after task-team is finished: + barrier-wait-end + barrier-end + implicit-task-end + idle-begin + state := omp_state_idle + + Before leaving, if state = omp_state_idle + idle-end + state := omp_state_overhead +*/ +#if OMPT_SUPPORT + omp_state_t ompt_entry_state; + ompt_data_t *pId = NULL; + ompt_data_t *tId; + if (ompt_enabled.enabled) { + ompt_entry_state = this_thr->th.ompt_thread_info.state; + if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit || + KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { ompt_lw_taskteam_t *team = this_thr->th.th_team->t.ompt_serialized_team_info; - ompt_parallel_id_t pId; - ompt_task_id_t tId; if (team) { - pId = team->ompt_team_info.parallel_id; - tId = team->ompt_task_info.task_id; + pId = &(team->ompt_team_info.parallel_data); + tId = &(team->ompt_task_info.task_data); } else { - pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; - tId = this_thr->th.th_current_task->ompt_task_info.task_id; + pId = OMPT_CUR_TEAM_DATA(this_thr); + tId = OMPT_CUR_TASK_DATA(this_thr); + } + } else { + pId = NULL; + tId = &(this_thr->th.ompt_thread_info.task_data); + } +#if OMPT_OPTIONAL + if (ompt_entry_state == omp_state_idle) { + if (ompt_enabled.ompt_callback_idle) { + ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin); } - ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId); + } else +#endif + if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec || + this_thr->th.th_task_team == NULL)) { + // implicit task is done. Either no taskqueue, or task-team finished + __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId); } } #endif @@ -208,6 +305,11 @@ this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; } else { KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); +#if OMPT_SUPPORT + // task-team is done now, other cases should be catched above + if (final_spin && ompt_enabled.enabled) + __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId); +#endif this_thr->th.th_task_team = NULL; this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; } @@ -295,29 +397,22 @@ // TODO: If thread is done with work and times out, disband/free } -#if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && ompt_state != ompt_state_undefined) { - if (ompt_state == ompt_state_idle) { - if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) { - ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1); - } - } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) { - KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || - ompt_state == ompt_state_wait_barrier_implicit || - ompt_state == ompt_state_wait_barrier_explicit); - - ompt_lw_taskteam_t *team = - this_thr->th.th_team->t.ompt_serialized_team_info; - ompt_parallel_id_t pId; - ompt_task_id_t tId; - if (team) { - pId = team->ompt_team_info.parallel_id; - tId = team->ompt_task_info.task_id; - } else { - pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; - tId = this_thr->th.th_current_task->ompt_task_info.task_id; +#if OMPT_SUPPORT + omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; + if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) { +#if OMPT_OPTIONAL + if (final_spin) { + __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId); + ompt_exit_state = this_thr->th.ompt_thread_info.state; + } +#endif + if (ompt_exit_state == omp_state_idle) { +#if OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_idle) { + ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_end); } - ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId); +#endif + this_thr->th.ompt_thread_info.state = omp_state_overhead; } } #endif Index: runtime/src/kmp_wait_release.cpp =================================================================== --- runtime/src/kmp_wait_release.cpp +++ runtime/src/kmp_wait_release.cpp @@ -2,7 +2,6 @@ * kmp_wait_release.cpp -- Wait/Release implementation */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure Index: runtime/src/kmp_wrapper_getpid.h =================================================================== --- runtime/src/kmp_wrapper_getpid.h +++ runtime/src/kmp_wrapper_getpid.h @@ -2,7 +2,6 @@ * kmp_wrapper_getpid.h -- getpid() declaration. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_WRAPPER_GETPID_H #define KMP_WRAPPER_GETPID_H Index: runtime/src/kmp_wrapper_malloc.h =================================================================== --- runtime/src/kmp_wrapper_malloc.h +++ runtime/src/kmp_wrapper_malloc.h @@ -3,7 +3,6 @@ * (malloc(), free(), and others). */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -13,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #ifndef KMP_WRAPPER_MALLOC_H #define KMP_WRAPPER_MALLOC_H Index: runtime/src/ompt-event-specific.h =================================================================== --- runtime/src/ompt-event-specific.h +++ runtime/src/ompt-event-specific.h @@ -22,132 +22,84 @@ | the OMPT TR. They are exposed to tools through ompt_set_callback. +--------------------------------------------------------------------------*/ -#define ompt_event_NEVER ompt_set_result_event_never_occurs -#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback -#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some -#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always +#define ompt_event_UNIMPLEMENTED ompt_set_never +#define ompt_event_MAY_CONVENIENT ompt_set_sometimes +#define ompt_event_MAY_ALWAYS ompt_set_always -#if OMPT_TRACE -#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS +#if OMPT_OPTIONAL +#define ompt_event_MAY_ALWAYS_OPTIONAL ompt_event_MAY_ALWAYS #else -#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED -#endif - -#if OMPT_BLAME -#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS -#else -#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED +#define ompt_event_MAY_ALWAYS_OPTIONAL ompt_event_UNIMPLEMENTED #endif /*---------------------------------------------------------------------------- | Mandatory Events +--------------------------------------------------------------------------*/ -#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_parallel_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_parallel_end_implemented ompt_event_MAY_ALWAYS -#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_task_create_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_task_schedule_implemented ompt_event_MAY_ALWAYS -#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_thread_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_thread_end_implemented ompt_event_MAY_ALWAYS -#define ompt_event_control_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS -#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS /*---------------------------------------------------------------------------- - | Optional Events (blame shifting) + | Target Related Events (not yet implemented) +--------------------------------------------------------------------------*/ -#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME - -#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_initialize_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_finalize_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_nest_lock_last_implemented \ - ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_callback_target_map_implemented ompt_event_UNIMPLEMENTED /*---------------------------------------------------------------------------- - | Optional Events (synchronous events) + | Optional Events (blame shifting) +--------------------------------------------------------------------------*/ -#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_idle_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_sync_region_wait_implemented \ + ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_mutex_released_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED - -#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE - -#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE +/*---------------------------------------------------------------------------- + | Optional Events (synchronous events) + +--------------------------------------------------------------------------*/ -#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_work_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_master_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_release_nest_lock_prev_implemented \ - ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_sync_region_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_nest_lock_first_implemented \ - ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_nest_lock_next_implemented \ - ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_mutex_acquire_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_mutex_acquired_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_nest_lock_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_lock_init_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_lock_destroy_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_flush_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_cancel_implemented ompt_event_MAY_ALWAYS_OPTIONAL #if OMP_40_ENABLED -#define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_callback_task_dependences_implemented \ + ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_task_dependence_implemented ompt_event_MAY_ALWAYS_OPTIONAL #else -#define ompt_event_task_dependences_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_task_dependence_pair_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_task_dependences_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_task_dependence_implemented ompt_event_UNIMPLEMENTED #endif /* OMP_40_ENABLED */ #endif Index: runtime/src/ompt-general.cpp =================================================================== --- runtime/src/ompt-general.cpp +++ runtime/src/ompt-general.cpp @@ -8,6 +8,10 @@ #include #include #include +#if KMP_OS_UNIX +#include +#include +#endif /***************************************************************************** * ompt include files @@ -36,8 +40,13 @@ typedef struct { const char *state_name; - ompt_state_t state_id; -} ompt_state_info_t; + omp_state_t state_id; +} omp_state_info_t; + +typedef struct { + const char *name; + ompt_mutex_impl_t id; +} ompt_mutex_impl_info_t; enum tool_setting_e { omp_tool_error, @@ -46,25 +55,32 @@ omp_tool_enabled }; -typedef void (*ompt_initialize_t)(ompt_function_lookup_t ompt_fn_lookup, - const char *version, - unsigned int ompt_version); +typedef int (*ompt_initialize_t)(ompt_function_lookup_t lookup, + struct ompt_fns_t *fns); + +typedef void (*ompt_finalize_t)(struct ompt_fns_t *fns); /***************************************************************************** * global variables ****************************************************************************/ -int ompt_enabled = 0; +ompt_callbacks_active_t ompt_enabled; + +omp_state_info_t omp_state_info[] = { +#define omp_state_macro(state, code) {#state, state}, + FOREACH_OMP_STATE(omp_state_macro) +#undef omp_state_macro +}; -ompt_state_info_t ompt_state_info[] = { -#define ompt_state_macro(state, code) {#state, state}, - FOREACH_OMPT_STATE(ompt_state_macro) -#undef ompt_state_macro +ompt_mutex_impl_info_t ompt_mutex_impl_info[] = { +#define ompt_mutex_impl_macro(name, id) {#name, name}, + FOREACH_OMPT_MUTEX_IMPL(ompt_mutex_impl_macro) +#undef ompt_mutex_impl_macro }; -ompt_callbacks_t ompt_callbacks; +ompt_callbacks_internal_t ompt_callbacks; -static ompt_initialize_t ompt_initialize_fn = NULL; +static ompt_fns_t *ompt_fns = NULL; /***************************************************************************** * forward declarations @@ -72,48 +88,71 @@ static ompt_interface_fn_t ompt_fn_lookup(const char *s); -OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void); +OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void); /***************************************************************************** * initialization and finalization (private operations) ****************************************************************************/ /* On Unix-like systems that support weak symbols the following implementation - * of ompt_tool() will be used in case no tool-supplied implementation of + * of ompt_start_tool() will be used in case no tool-supplied implementation of * this function is present in the address space of a process. * * On Windows, the ompt_tool_windows function is used to find the * ompt_tool symbol across all modules loaded by a process. If ompt_tool is * found, ompt_tool's return value is used to initialize the tool. Otherwise, * NULL is returned and OMPT won't be enabled */ + +typedef ompt_fns_t *(*ompt_start_tool_t)(unsigned int, const char *); + +#if KMP_OS_UNIX + #if OMPT_HAVE_WEAK_ATTRIBUTE +_OMP_EXTERN __attribute__((weak)) +#elif defined KMP_DYNAMIC_LIB _OMP_EXTERN -__attribute__((weak)) ompt_initialize_t ompt_tool() { +#warning Activation of OMPT is might fail for tools statically linked into the application. +#else +#error Activation of OMPT is not supported on this platform. +#endif +ompt_fns_t * +ompt_start_tool(unsigned int omp_version, const char *runtime_version) { +#ifdef KMP_DYNAMIC_LIB + ompt_fns_t *ret = NULL; + // Try next symbol in the address space + ompt_start_tool_t next_tool = NULL; + *(void **)(&next_tool) = dlsym(RTLD_NEXT, "ompt_start_tool"); + if (next_tool) + ret = (next_tool)(omp_version, runtime_version); + return ret; +#else #if OMPT_DEBUG - printf("ompt_tool() is called from the RTL\n"); + printf("ompt_start_tool() is called from the RTL\n"); #endif return NULL; +#endif } #elif OMPT_HAVE_PSAPI #include #pragma comment(lib, "psapi.lib") -#define ompt_tool ompt_tool_windows +#define ompt_start_tool ompt_tool_windows // The number of loaded modules to start enumeration with EnumProcessModules() #define NUM_MODULES 128 -static ompt_initialize_t ompt_tool_windows() { +static ompt_fns_t *ompt_tool_windows(unsigned int omp_version, + const char *runtime_version) { int i; DWORD needed, new_size; HMODULE *modules; HANDLE process = GetCurrentProcess(); modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE)); - ompt_initialize_t (*ompt_tool_p)() = NULL; + ompt_start_tool_t ompt_tool_p = NULL; #if OMPT_DEBUG - printf("ompt_tool_windows(): looking for ompt_tool\n"); + printf("ompt_tool_windows(): looking for ompt_start_tool\n"); #endif if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE), &needed)) { @@ -135,21 +174,22 @@ } } for (i = 0; i < new_size; ++i) { - (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool"); + (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_start_tool"); if (ompt_tool_p) { #if OMPT_DEBUG TCHAR modName[MAX_PATH]; if (GetModuleFileName(modules[i], modName, MAX_PATH)) - printf("ompt_tool_windows(): ompt_tool found in module %s\n", modName); + printf("ompt_tool_windows(): ompt_start_tool found in module %s\n", + modName); #endif free(modules); - return ompt_tool_p(); + return (*ompt_tool_p)(omp_version, runtime_version); } #if OMPT_DEBUG else { TCHAR modName[MAX_PATH]; if (GetModuleFileName(modules[i], modName, MAX_PATH)) - printf("ompt_tool_windows(): ompt_tool not found in module %s\n", + printf("ompt_tool_windows(): ompt_start_tool not found in module %s\n", modName); } #endif @@ -161,6 +201,49 @@ #error Either __attribute__((weak)) or psapi.dll are required for OMPT support #endif // OMPT_HAVE_WEAK_ATTRIBUTE +static ompt_fns_t *ompt_try_start_tool(unsigned int omp_version, + const char *runtime_version) { + ompt_fns_t *ret = NULL; + ompt_start_tool_t start_tool = NULL; +#if KMP_OS_WINDOWS + // Cannot use colon to describe a list of absolute paths on Windows + const char *sep = ";"; +#else + const char *sep = ":"; +#endif + + // Try in the current address space + if ((ret = ompt_start_tool(omp_version, runtime_version))) + return ret; + + // Try tool-libraries-var ICV + const char *tool_libs = getenv("OMP_TOOL_LIBRARIES"); + if (tool_libs) { + const char *libs = __kmp_str_format("%s", tool_libs); + char *buf; + char *fname = __kmp_str_token(CCAST(char *, libs), sep, &buf); + while (fname) { +#if KMP_OS_UNIX + void *h = dlopen(fname, RTLD_LAZY); + if (h) { + *(void **)(&start_tool) = dlsym(h, "ompt_start_tool"); +#elif KMP_OS_WINDOWS + HMODULE h = LoadLibrary(fname); + if (h) { + start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool"); +#else +#error Activation of OMPT is not supported on this platform. +#endif + if (start_tool && (ret = (*start_tool)(omp_version, runtime_version))) + break; + } + fname = __kmp_str_token(NULL, sep, &buf); + } + __kmp_str_free(&libs); + } + return ret; +} + void ompt_pre_init() { //-------------------------------------------------- // Execute the pre-initialization logic only once. @@ -194,10 +277,14 @@ case omp_tool_unset: case omp_tool_enabled: - ompt_initialize_fn = ompt_tool(); - if (ompt_initialize_fn) { - ompt_enabled = 1; - } + + //-------------------------------------------------- + // Load tool iff specified in environment variable + //-------------------------------------------------- + ompt_fns = + ompt_try_start_tool(__kmp_openmp_version, ompt_get_runtime_version()); + + memset(&ompt_enabled, 0, sizeof(ompt_enabled)); break; case omp_tool_error: @@ -226,31 +313,34 @@ //-------------------------------------------------- // Initialize the tool if so indicated. //-------------------------------------------------- - if (ompt_enabled) { - ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(), - OMPT_VERSION); + if (ompt_fns) { + ompt_enabled.enabled = !!ompt_fns->initialize(ompt_fn_lookup, ompt_fns); ompt_thread_t *root_thread = ompt_get_thread(); - ompt_set_thread_state(root_thread, ompt_state_overhead); + ompt_set_thread_state(root_thread, omp_state_overhead); - if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { - ompt_callbacks.ompt_callback(ompt_event_thread_begin)( - ompt_thread_initial, ompt_get_thread_id()); + if (ompt_enabled.ompt_callback_thread_begin) { + ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( + ompt_thread_initial, __ompt_get_thread_data_internal()); + } + ompt_data_t *task_data; + __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); + if (ompt_enabled.ompt_callback_task_create) { + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + NULL, NULL, task_data, ompt_task_initial, 0, NULL); } - ompt_set_thread_state(root_thread, ompt_state_work_serial); + ompt_set_thread_state(root_thread, omp_state_work_serial); } } void ompt_fini() { - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) { - ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)(); - } + if (ompt_enabled.enabled) { + ompt_fns->finalize(ompt_fns); } - ompt_enabled = 0; + memset(&ompt_enabled, 0, sizeof(ompt_enabled)); } /***************************************************************************** @@ -261,15 +351,15 @@ * state ****************************************************************************/ -OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state, - const char **next_state_name) { - const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t); +OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state, + const char **next_state_name) { + const static int len = sizeof(omp_state_info) / sizeof(omp_state_info_t); int i = 0; for (i = 0; i < len - 1; i++) { - if (ompt_state_info[i].state_id == current_state) { - *next_state = ompt_state_info[i + 1].state_id; - *next_state_name = ompt_state_info[i + 1].state_name; + if (omp_state_info[i].state_id == current_state) { + *next_state = omp_state_info[i + 1].state_id; + *next_state_name = omp_state_info[i + 1].state_name; return 1; } } @@ -277,17 +367,35 @@ return 0; } +OMPT_API_ROUTINE int ompt_enumerate_mutex_impls(int current_impl, + int *next_impl, + const char **next_impl_name) { + const static int len = + sizeof(ompt_mutex_impl_info) / sizeof(ompt_mutex_impl_info_t); + int i = 0; + for (i = 0; i < len - 1; i++) { + if (ompt_mutex_impl_info[i].id != current_impl) + continue; + *next_impl = ompt_mutex_impl_info[i + 1].id; + *next_impl_name = ompt_mutex_impl_info[i + 1].name; + return 1; + } + return 0; +} + /***************************************************************************** * callbacks ****************************************************************************/ -OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) { - switch (evid) { +OMPT_API_ROUTINE int ompt_set_callback(ompt_callbacks_t which, + ompt_callback_t callback) { + switch (which) { #define ompt_event_macro(event_name, callback_type, event_id) \ case event_name: \ if (ompt_event_implementation_status(event_name)) { \ - ompt_callbacks.ompt_callback(event_name) = (callback_type)cb; \ + ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \ + ompt_enabled.event_name = 1; \ } \ return ompt_event_implementation_status(event_name); @@ -296,12 +404,13 @@ #undef ompt_event_macro default: - return ompt_set_result_registration_error; + return ompt_set_error; } } -OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) { - switch (evid) { +OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which, + ompt_callback_t *callback) { + switch (which) { #define ompt_event_macro(event_name, callback_type, event_id) \ case event_name: \ @@ -309,7 +418,7 @@ ompt_callback_t mycb = \ (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \ if (mycb) { \ - *cb = mycb; \ + *callback = mycb; \ return ompt_get_callback_success; \ } \ } \ @@ -328,54 +437,149 @@ * parallel regions ****************************************************************************/ -OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) { - return __ompt_get_parallel_id_internal(ancestor_level); -} - -OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) { - return __ompt_get_parallel_team_size_internal(ancestor_level); -} - -OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) { - return __ompt_get_parallel_function_internal(ancestor_level); +OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level, + ompt_data_t **parallel_data, + int *team_size) { + return __ompt_get_parallel_info_internal(ancestor_level, parallel_data, + team_size); } -OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) { - ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id); +OMPT_API_ROUTINE omp_state_t ompt_get_state(ompt_wait_id_t *wait_id) { + omp_state_t thread_state = __ompt_get_state_internal(wait_id); - if (thread_state == ompt_state_undefined) { - thread_state = ompt_state_work_serial; + if (thread_state == omp_state_undefined) { + thread_state = omp_state_work_serial; } return thread_state; } /***************************************************************************** - * threads + * tasks ****************************************************************************/ -OMPT_API_ROUTINE void *ompt_get_idle_frame() { - return __ompt_get_idle_frame_internal(); +OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) { + return __ompt_get_thread_data_internal(); +} + +OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num) { + return __ompt_get_task_info_internal(ancestor_level, type, task_data, + task_frame, parallel_data, thread_num); } /***************************************************************************** - * tasks + * places ****************************************************************************/ -OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) { - return __ompt_get_thread_id_internal(); +OMPT_API_ROUTINE int ompt_get_num_places(void) { +// copied from kmp_ftn_entry.h (but modified) +#if !KMP_AFFINITY_SUPPORTED + return 0; +#else + if (!KMP_AFFINITY_CAPABLE()) + return 0; + return __kmp_affinity_num_masks; +#endif +} + +OMPT_API_ROUTINE int ompt_get_place_proc_ids(int place_num, int ids_size, + int *ids) { +// copied from kmp_ftn_entry.h (but modified) +#if !KMP_AFFINITY_SUPPORTED + return 0; +#else + int i, count; + int tmp_ids[ids_size]; + if (!KMP_AFFINITY_CAPABLE()) + return 0; + if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) + return 0; + /* TODO: Is this safe for asynchronous call from signal handler during runtime + * shutdown? */ + kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); + count = 0; + KMP_CPU_SET_ITERATE(i, mask) { + if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || + (!KMP_CPU_ISSET(i, mask))) { + continue; + } + if (count < ids_size) + tmp_ids[count] = i; + count++; + } + if (ids_size >= count) { + for (i = 0; i < count; i++) { + ids[i] = tmp_ids[i]; + } + } + return count; +#endif } -OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) { - return __ompt_get_task_id_internal(depth); +OMPT_API_ROUTINE int ompt_get_place_num(void) { +// copied from kmp_ftn_entry.h (but modified) +#if !KMP_AFFINITY_SUPPORTED + return -1; +#else + int gtid; + kmp_info_t *thread; + if (!KMP_AFFINITY_CAPABLE()) + return -1; + gtid = __kmp_entry_gtid(); + thread = __kmp_thread_from_gtid(gtid); + if (thread == NULL || thread->th.th_current_place < 0) + return -1; + return thread->th.th_current_place; +#endif } -OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) { - return __ompt_get_task_frame_internal(depth); +OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size, + int *place_nums) { +// copied from kmp_ftn_entry.h (but modified) +#if !KMP_AFFINITY_SUPPORTED + return 0; +#else + int i, gtid, place_num, first_place, last_place, start, end; + kmp_info_t *thread; + if (!KMP_AFFINITY_CAPABLE()) + return 0; + gtid = __kmp_entry_gtid(); + thread = __kmp_thread_from_gtid(gtid); + if (thread == NULL) + return 0; + first_place = thread->th.th_first_place; + last_place = thread->th.th_last_place; + if (first_place < 0 || last_place < 0) + return 0; + if (first_place <= last_place) { + start = first_place; + end = last_place; + } else { + start = last_place; + end = first_place; + } + if (end - start <= place_nums_size) + for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) { + place_nums[i] = place_num; + } + return end - start; +#endif } -OMPT_API_ROUTINE void *ompt_get_task_function(int depth) { - return __ompt_get_task_function_internal(depth); +/***************************************************************************** + * places + ****************************************************************************/ + +OMPT_API_ROUTINE int ompt_get_proc_id(void) { +#if KMP_OS_LINUX + return sched_getcpu(); +#else + return -1; +#endif } /***************************************************************************** @@ -435,28 +639,59 @@ OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; } /***************************************************************************** - * application-facing API +* application-facing API ****************************************************************************/ /*---------------------------------------------------------------------------- | control ---------------------------------------------------------------------------*/ -_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) { - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) { - ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier); +int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg) { + + if (ompt_enabled.enabled) { + if (ompt_enabled.ompt_callback_control_tool) { + return ompt_callbacks.ompt_callback(ompt_callback_control_tool)( + command, modifier, arg, OMPT_LOAD_RETURN_ADDRESS(__kmp_entry_gtid())); + } else { + return -1; + } + } else { + return -2; } } /***************************************************************************** + * misc + ****************************************************************************/ + +OMPT_API_ROUTINE uint64_t ompt_get_unique_id(void) { + return __ompt_get_unique_id_internal(); +} + +/***************************************************************************** + * Target + ****************************************************************************/ + +OMPT_API_ROUTINE int ompt_get_target_info(uint64_t *device_num, + ompt_id_t *target_id, + ompt_id_t *host_op_id) { + return 0; // thread is not in a target region +} + +OMPT_API_ROUTINE int ompt_get_num_devices(void) { + return 1; // only one device (the current device) is available +} + +/***************************************************************************** * API inquiry for tool ****************************************************************************/ static ompt_interface_fn_t ompt_fn_lookup(const char *s) { #define ompt_interface_fn(fn) \ + fn##_t fn##_f = fn; \ if (strcmp(s, #fn) == 0) \ - return (ompt_interface_fn_t)fn; + return (ompt_interface_fn_t)fn##_f; FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) Index: runtime/src/ompt-internal.h =================================================================== --- runtime/src/ompt-internal.h +++ runtime/src/ompt-internal.h @@ -13,19 +13,38 @@ #define ompt_callback(e) e##_callback -typedef struct ompt_callbacks_s { +typedef struct ompt_callbacks_internal_s { #define ompt_event_macro(event, callback, eventid) \ callback ompt_callback(event); FOREACH_OMPT_EVENT(ompt_event_macro) #undef ompt_event_macro -} ompt_callbacks_t; +} ompt_callbacks_internal_t; + +typedef struct ompt_callbacks_active_s { + unsigned int enabled : 1; +#define ompt_event_macro(event, callback, eventid) unsigned int event : 1; + + FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_callbacks_active_t; + +typedef struct kmp_taskdata kmp_taskdata_t; + +#define TASK_TYPE_DETAILS_FORMAT(info) \ + ((info->td_flags.task_serial || info->td_flags.tasking_ser) \ + ? ompt_task_undeferred \ + : 0x0) | \ + ((!(info->td_flags.tiedness)) ? ompt_task_untied : 0x0) | \ + (info->td_flags.final ? ompt_task_final : 0x0) | \ + (info->td_flags.merged_if0 ? ompt_task_mergeable : 0x0) typedef struct { ompt_frame_t frame; - void *function; - ompt_task_id_t task_id; + ompt_data_t task_data; + kmp_taskdata_t *scheduling_parent; #if OMP_40_ENABLED int ndeps; ompt_task_dependence_t *deps; @@ -33,32 +52,31 @@ } ompt_task_info_t; typedef struct { - ompt_parallel_id_t parallel_id; - void *microtask; + ompt_data_t parallel_data; + void *master_return_address; } ompt_team_info_t; typedef struct ompt_lw_taskteam_s { ompt_team_info_t ompt_team_info; ompt_task_info_t ompt_task_info; + int heap; struct ompt_lw_taskteam_s *parent; } ompt_lw_taskteam_t; -typedef struct ompt_parallel_info_s { - ompt_task_id_t parent_task_id; /* id of parent task */ - ompt_parallel_id_t parallel_id; /* id of parallel region */ - ompt_frame_t *parent_task_frame; /* frame data of parent task */ - void *parallel_function; /* pointer to outlined function */ -} ompt_parallel_info_t; - typedef struct { - ompt_state_t state; + ompt_data_t thread_data; + ompt_data_t task_data; /* stored here from implicit barrier-begin until + implicit-task-end */ + void *return_address; /* stored here on entry of runtime */ + omp_state_t state; ompt_wait_id_t wait_id; + int ompt_task_yielded; void *idle_frame; } ompt_thread_info_t; -extern ompt_callbacks_t ompt_callbacks; +extern ompt_callbacks_internal_t ompt_callbacks; -#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE +#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_OPTIONAL #if USE_FAST_MEMORY #define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate #define KMP_OMPT_DEPS_FREE __kmp_fast_free @@ -66,7 +84,7 @@ #define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc #define KMP_OMPT_DEPS_FREE __kmp_thread_free #endif -#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */ +#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_OPTIONAL */ #ifdef __cplusplus extern "C" { @@ -76,7 +94,20 @@ void ompt_post_init(void); void ompt_fini(void); -extern int ompt_enabled; +#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) +#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level) + +int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg); + +extern ompt_callbacks_active_t ompt_enabled; + +#if KMP_OS_WINDOWS +#define UNLIKELY(x) (x) +#define OMPT_NOINLINE __declspec(noinline) +#else +#define UNLIKELY(x) __builtin_expect(!!(x), 0) +#define OMPT_NOINLINE __attribute__((noinline)) +#endif #ifdef __cplusplus }; Index: runtime/src/ompt-specific.h =================================================================== --- runtime/src/ompt-specific.h +++ runtime/src/ompt-specific.h @@ -13,42 +13,63 @@ * forward declarations ****************************************************************************/ -void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid); +void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid); void __ompt_thread_assign_wait_id(void *variable); void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr, - int gtid, void *microtask, - ompt_parallel_id_t ompt_pid); + int gtid, ompt_data_t *ompt_pid, void *codeptr); -void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr); +void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr, + int on_heap); -ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(ompt_thread_t *thr); - -ompt_parallel_id_t __ompt_parallel_id_new(int gtid); -ompt_task_id_t __ompt_task_id_new(int gtid); +void __ompt_lw_taskteam_unlink(ompt_thread_t *thr); ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); -ompt_task_info_t *__ompt_get_taskinfo(int depth); - -void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid); +ompt_task_info_t *__ompt_get_task_info_object(int depth); -void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid); +int __ompt_get_parallel_info_internal(int ancestor_level, + ompt_data_t **parallel_data, + int *team_size); -int __ompt_get_parallel_team_size_internal(int ancestor_level); +int __ompt_get_task_info_internal(int ancestor_level, int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, int *thread_num); -ompt_task_id_t __ompt_get_task_id_internal(int depth); +ompt_data_t *__ompt_get_thread_data_internal(); -ompt_frame_t *__ompt_get_task_frame_internal(int depth); +static uint64_t __ompt_get_get_unique_id_internal(); /***************************************************************************** * macros ****************************************************************************/ +#define OMPT_CUR_TASK_INFO(thr) (&(thr->th.th_current_task->ompt_task_info)) +#define OMPT_CUR_TASK_DATA(thr) \ + (&(thr->th.th_current_task->ompt_task_info.task_data)) +#define OMPT_CUR_TEAM_INFO(thr) (&(thr->th.th_team->t.ompt_team_info)) +#define OMPT_CUR_TEAM_DATA(thr) \ + (&(thr->th.th_team->t.ompt_team_info.parallel_data)) + #define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI #define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) +inline void *__ompt_load_return_address(int gtid) { + kmp_info_t *thr = __kmp_threads[gtid]; + void *return_address = thr->th.ompt_thread_info.return_address; + thr->th.ompt_thread_info.return_address = NULL; + return return_address; +} + +#define OMPT_STORE_RETURN_ADDRESS(gtid) \ + if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ + !__kmp_threads[gtid]->th.ompt_thread_info.return_address) \ + __kmp_threads[gtid]->th.ompt_thread_info.return_address = \ + __builtin_return_address(0) +#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) + //****************************************************************************** // inline functions //****************************************************************************** @@ -62,7 +83,7 @@ return ompt_get_thread_gtid(gtid); } -inline void ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) { +inline void ompt_set_thread_state(ompt_thread_t *thread, omp_state_t state) { thread->th.ompt_thread_info.state = state; } Index: runtime/src/ompt-specific.cpp =================================================================== --- runtime/src/ompt-specific.cpp +++ runtime/src/ompt-specific.cpp @@ -6,39 +6,32 @@ #include "ompt-internal.h" #include "ompt-specific.h" +#if KMP_OS_UNIX +#include +#include +#endif + +#if KMP_OS_WINDOWS +#define THREAD_LOCAL __declspec(thread) +#else +#define THREAD_LOCAL __thread +#endif + //****************************************************************************** // macros //****************************************************************************** -#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t)(id >= 0) ? id + 1 : 0) - -#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info; +#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info #define OMPT_THREAD_ID_BITS 16 -// 2013 08 24 - John Mellor-Crummey -// ideally, a thread should assign its own ids based on thread private data. -// however, the way the intel runtime reinitializes thread data structures -// when it creates teams makes it difficult to maintain persistent thread -// data. using a shared variable instead is simple. I leave it to intel to -// sort out how to implement a higher performance version in their runtime. - -// when using fetch_and_add to generate the IDs, there isn't any reason to waste -// bits for thread id. -#if 0 -#define NEXT_ID(id_ptr, tid) \ - ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid)) -#else -#define NEXT_ID(id_ptr, tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr)) -#endif - //****************************************************************************** // private operations //****************************************************************************** //---------------------------------------------------------- // traverse the team and task hierarchy -// note: __ompt_get_teaminfo and __ompt_get_taskinfo +// note: __ompt_get_teaminfo and __ompt_get_task_info_object // traverse the hierarchy similarly and need to be // kept consistent //---------------------------------------------------------- @@ -51,7 +44,7 @@ if (team == NULL) return NULL; - ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team); + ompt_lw_taskteam_t *next_lwt = LWT_FROM_TEAM(team), *lwt = NULL; while (depth > 0) { // next lightweight team (if any) @@ -61,9 +54,14 @@ // next heavyweight team (if any) after // lightweight teams are exhausted if (!lwt && team) { - team = team->t.t_parent; - if (team) { - lwt = LWT_FROM_TEAM(team); + if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + team = team->t.t_parent; + if (team) { + next_lwt = LWT_FROM_TEAM(team); + } } } @@ -90,13 +88,14 @@ return NULL; } -ompt_task_info_t *__ompt_get_taskinfo(int depth) { +ompt_task_info_t *__ompt_get_task_info_object(int depth) { ompt_task_info_t *info = NULL; kmp_info_t *thr = ompt_get_thread(); if (thr) { kmp_taskdata_t *taskdata = thr->th.th_current_task; - ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team); + ompt_lw_taskteam_t *lwt = NULL, + *next_lwt = LWT_FROM_TEAM(taskdata->td_team); while (depth > 0) { // next lightweight team (if any) @@ -106,9 +105,59 @@ // next heavyweight team (if any) after // lightweight teams are exhausted if (!lwt && taskdata) { - taskdata = taskdata->td_parent; - if (taskdata) { - lwt = LWT_FROM_TEAM(taskdata->td_team); + if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + taskdata = taskdata->td_parent; + if (taskdata) { + next_lwt = LWT_FROM_TEAM(taskdata->td_team); + } + } + } + depth--; + } + + if (lwt) { + info = &lwt->ompt_task_info; + } else if (taskdata) { + info = &taskdata->ompt_task_info; + } + } + + return info; +} + +ompt_task_info_t *__ompt_get_scheduling_taskinfo(int depth) { + ompt_task_info_t *info = NULL; + kmp_info_t *thr = ompt_get_thread(); + + if (thr) { + kmp_taskdata_t *taskdata = thr->th.th_current_task; + + ompt_lw_taskteam_t *lwt = NULL, + *next_lwt = LWT_FROM_TEAM(taskdata->td_team); + + while (depth > 0) { + // next lightweight team (if any) + if (lwt) + lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && taskdata) { + // first try scheduling parent (for explicit task scheduling) + if (taskdata->ompt_task_info.scheduling_parent) { + taskdata = taskdata->ompt_task_info.scheduling_parent; + } else if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + // then go for implicit tasks + taskdata = taskdata->td_parent; + if (taskdata) { + next_lwt = LWT_FROM_TEAM(taskdata->td_team); + } } } depth--; @@ -132,29 +181,14 @@ // thread support //---------------------------------------------------------- -ompt_parallel_id_t __ompt_thread_id_new() { - static uint64_t ompt_thread_id = 1; - return NEXT_ID(&ompt_thread_id, 0); -} - -void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) { - ompt_callbacks.ompt_callback(ompt_event_thread_begin)( - thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); -} - -void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid) { - ompt_callbacks.ompt_callback(ompt_event_thread_end)( - thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); -} - -ompt_thread_id_t __ompt_get_thread_id_internal() { - // FIXME: until we have a better way of assigning ids, use __kmp_get_gtid - // since the return value might be negative, we need to test that before - // assigning it to an ompt_thread_id_t, which is unsigned. - int id = __kmp_get_gtid(); - assert(id >= 0); - - return GTID_TO_OMPT_THREAD_ID(id); +ompt_data_t *__ompt_get_thread_data_internal() { + if (__kmp_get_gtid() >= 0) { + kmp_info_t *thread = ompt_get_thread(); + if (thread == NULL) + return NULL; + return &(thread->th.ompt_thread_info.thread_data); + } + return NULL; } //---------------------------------------------------------- @@ -162,13 +196,12 @@ //---------------------------------------------------------- void __ompt_thread_assign_wait_id(void *variable) { - int gtid = __kmp_gtid_get_specific(); - kmp_info_t *ti = ompt_get_thread_gtid(gtid); + kmp_info_t *ti = ompt_get_thread(); ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable; } -ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) { +omp_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) { kmp_info_t *ti = ompt_get_thread(); if (ti) { @@ -176,46 +209,26 @@ *ompt_wait_id = ti->th.ompt_thread_info.wait_id; return ti->th.ompt_thread_info.state; } - return ompt_state_undefined; -} - -//---------------------------------------------------------- -// idle frame support -//---------------------------------------------------------- - -void *__ompt_get_idle_frame_internal(void) { - kmp_info_t *ti = ompt_get_thread(); - return ti ? ti->th.ompt_thread_info.idle_frame : NULL; + return omp_state_undefined; } //---------------------------------------------------------- // parallel region support //---------------------------------------------------------- -ompt_parallel_id_t __ompt_parallel_id_new(int gtid) { - static uint64_t ompt_parallel_id = 1; - return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0; -} - -void *__ompt_get_parallel_function_internal(int depth) { - ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); - void *function = info ? info->microtask : NULL; - return function; -} - -ompt_parallel_id_t __ompt_get_parallel_id_internal(int depth) { - ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); - ompt_parallel_id_t id = info ? info->parallel_id : 0; - return id; -} - -int __ompt_get_parallel_team_size_internal(int depth) { - // initialize the return value with the error value. - // if there is a team at the specified depth, the default - // value will be overwritten the size of that team. - int size = -1; - (void)__ompt_get_teaminfo(depth, &size); - return size; +int __ompt_get_parallel_info_internal(int ancestor_level, + ompt_data_t **parallel_data, + int *team_size) { + ompt_team_info_t *info; + if (team_size) { + info = __ompt_get_teaminfo(ancestor_level, team_size); + } else { + info = __ompt_get_teaminfo(ancestor_level, NULL); + } + if (parallel_data) { + *parallel_data = info ? &(info->parallel_data) : NULL; + } + return info ? 2 : 0; } //---------------------------------------------------------- @@ -223,60 +236,182 @@ //---------------------------------------------------------- void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, - void *microtask, ompt_parallel_id_t ompt_pid) { - lwt->ompt_team_info.parallel_id = ompt_pid; - lwt->ompt_team_info.microtask = microtask; - lwt->ompt_task_info.task_id = 0; + ompt_data_t *ompt_pid, void *codeptr) { + // initialize parallel_data with input, return address to parallel_data on + // exit + lwt->ompt_team_info.parallel_data = *ompt_pid; + lwt->ompt_team_info.master_return_address = codeptr; + lwt->ompt_task_info.task_data.value = 0; lwt->ompt_task_info.frame.reenter_runtime_frame = NULL; lwt->ompt_task_info.frame.exit_runtime_frame = NULL; - lwt->ompt_task_info.function = NULL; + lwt->ompt_task_info.scheduling_parent = NULL; + lwt->ompt_task_info.deps = NULL; + lwt->ompt_task_info.ndeps = 0; + lwt->heap = 0; lwt->parent = 0; } -void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) { - ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info; - lwt->parent = my_parent; - thr->th.th_team->t.ompt_serialized_team_info = lwt; +void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, + int on_heap) { + ompt_lw_taskteam_t *link_lwt = lwt; + if (thr->th.th_team->t.t_serialized > + 1) { // we already have a team, so link the new team and swap values + if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap + link_lwt = + (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); + } + link_lwt->heap = on_heap; + + // would be swap in the (on_stack) case. + ompt_team_info_t tmp_team = lwt->ompt_team_info; + link_lwt->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr); + *OMPT_CUR_TEAM_INFO(thr) = tmp_team; + + ompt_task_info_t tmp_task = lwt->ompt_task_info; + link_lwt->ompt_task_info = *OMPT_CUR_TASK_INFO(thr); + *OMPT_CUR_TASK_INFO(thr) = tmp_task; + + // link the taskteam into the list of taskteams: + ompt_lw_taskteam_t *my_parent = + thr->th.th_team->t.ompt_serialized_team_info; + link_lwt->parent = my_parent; + thr->th.th_team->t.ompt_serialized_team_info = link_lwt; + } else { + // this is the first serialized team, so we just store the values in the + // team and drop the taskteam-object + *OMPT_CUR_TEAM_INFO(thr) = lwt->ompt_team_info; + *OMPT_CUR_TASK_INFO(thr) = lwt->ompt_task_info; + } } -ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(kmp_info_t *thr) { +void __ompt_lw_taskteam_unlink(kmp_info_t *thr) { ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; - if (lwtask) + if (lwtask) { thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; - return lwtask; + + ompt_team_info_t tmp_team = lwtask->ompt_team_info; + lwtask->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr); + *OMPT_CUR_TEAM_INFO(thr) = tmp_team; + + ompt_task_info_t tmp_task = lwtask->ompt_task_info; + lwtask->ompt_task_info = *OMPT_CUR_TASK_INFO(thr); + *OMPT_CUR_TASK_INFO(thr) = tmp_task; + + if (lwtask->heap) { + __kmp_free(lwtask); + lwtask = NULL; + } + } + // return lwtask; } //---------------------------------------------------------- // task support //---------------------------------------------------------- -ompt_task_id_t __ompt_task_id_new(int gtid) { - static uint64_t ompt_task_id = 1; - return NEXT_ID(&ompt_task_id, gtid); -} +int __ompt_get_task_info_internal(int ancestor_level, int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num) { + if (ancestor_level < 0) + return 0; -ompt_task_id_t __ompt_get_task_id_internal(int depth) { - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - ompt_task_id_t task_id = info ? info->task_id : 0; - return task_id; -} + // copied from __ompt_get_scheduling_taskinfo + ompt_task_info_t *info = NULL; + ompt_team_info_t *team_info = NULL; + kmp_info_t *thr = ompt_get_thread(); -void *__ompt_get_task_function_internal(int depth) { - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - void *function = info ? info->function : NULL; - return function; -} + if (thr) { + kmp_taskdata_t *taskdata = thr->th.th_current_task; + if (taskdata == NULL) + return 0; + kmp_team *team = thr->th.th_team; + if (team == NULL) + return 0; + ompt_lw_taskteam_t *lwt = NULL, + *next_lwt = LWT_FROM_TEAM(taskdata->td_team); + + while (ancestor_level > 0) { + // next lightweight team (if any) + if (lwt) + lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && taskdata) { + // first try scheduling parent (for explicit task scheduling) + if (taskdata->ompt_task_info.scheduling_parent) { + taskdata = taskdata->ompt_task_info.scheduling_parent; + } else if (next_lwt) { + lwt = next_lwt; + next_lwt = NULL; + } else { + // then go for implicit tasks + taskdata = taskdata->td_parent; + if (team == NULL) + return 0; + team = team->t.t_parent; + if (taskdata) { + next_lwt = LWT_FROM_TEAM(taskdata->td_team); + } + } + } + ancestor_level--; + } -ompt_frame_t *__ompt_get_task_frame_internal(int depth) { - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - ompt_frame_t *frame = info ? frame = &info->frame : NULL; - return frame; + if (lwt) { + info = &lwt->ompt_task_info; + team_info = &lwt->ompt_team_info; + if (type) { + *type = ompt_task_implicit; + } + } else if (taskdata) { + info = &taskdata->ompt_task_info; + team_info = &team->t.ompt_team_info; + if (type) { + if (taskdata->td_parent) { + *type = (taskdata->td_flags.tasktype ? ompt_task_explicit + : ompt_task_implicit) | + TASK_TYPE_DETAILS_FORMAT(taskdata); + } else { + *type = ompt_task_initial; + } + } + } + if (task_data) { + *task_data = info ? &info->task_data : NULL; + } + if (task_frame) { + // OpenMP spec asks for the scheduling task to be returned. + *task_frame = info ? &info->frame : NULL; + } + if (parallel_data) { + *parallel_data = team_info ? &(team_info->parallel_data) : NULL; + } + return info ? 2 : 0; + } + return 0; } //---------------------------------------------------------- // team support //---------------------------------------------------------- -void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) { - team->t.ompt_team_info.parallel_id = ompt_pid; +void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid) { + team->t.ompt_team_info.parallel_data = ompt_pid; +} + +//---------------------------------------------------------- +// misc +//---------------------------------------------------------- + +static uint64_t __ompt_get_unique_id_internal() { + static uint64_t thread = 1; + static THREAD_LOCAL uint64_t ID = 0; + if (ID == 0) { + uint64_t new_thread = KMP_TEST_THEN_INC64((kmp_int64 *)&thread); + ID = new_thread << (sizeof(uint64_t) * 8 - OMPT_THREAD_ID_BITS); + } + return ++ID; } Index: runtime/src/z_Linux_util.cpp =================================================================== --- runtime/src/z_Linux_util.cpp +++ runtime/src/z_Linux_util.cpp @@ -2,7 +2,6 @@ * z_Linux_util.cpp -- platform specific routines. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_affinity.h" #include "kmp_i18n.h" @@ -1396,7 +1394,6 @@ } } - /* This routine puts the calling thread to sleep after setting the sleep bit for the indicated flag variable to true. */ template @@ -2283,7 +2280,7 @@ #endif ) { #if OMPT_SUPPORT - *exit_frame_ptr = __builtin_frame_address(0); + *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); #endif switch (argc) { Index: runtime/src/z_Windows_NT-586_util.cpp =================================================================== --- runtime/src/z_Windows_NT-586_util.cpp +++ runtime/src/z_Windows_NT-586_util.cpp @@ -2,7 +2,6 @@ * z_Windows_NT-586_util.cpp -- platform specific routines. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) Index: runtime/src/z_Windows_NT_util.cpp =================================================================== --- runtime/src/z_Windows_NT_util.cpp +++ runtime/src/z_Windows_NT_util.cpp @@ -2,7 +2,6 @@ * z_Windows_NT_util.cpp -- platform specific routines. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -12,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "kmp.h" #include "kmp_affinity.h" #include "kmp_i18n.h" Index: runtime/test/CMakeLists.txt =================================================================== --- runtime/test/CMakeLists.txt +++ runtime/test/CMakeLists.txt @@ -34,8 +34,7 @@ pythonize_bool(LIBOMP_USE_HWLOC) pythonize_bool(LIBOMP_OMPT_SUPPORT) -pythonize_bool(LIBOMP_OMPT_BLAME) -pythonize_bool(LIBOMP_OMPT_TRACE) +pythonize_bool(LIBOMP_OMPT_OPTIONAL) pythonize_bool(LIBOMP_HAVE_LIBM) pythonize_bool(LIBOMP_HAVE_LIBATOMIC) Index: runtime/test/lit.cfg =================================================================== --- runtime/test/lit.cfg +++ runtime/test/lit.cfg @@ -92,24 +92,15 @@ # for callback.h config.test_cflags += " -I " + config.test_source_root + "/ompt" -if re.search('clang', config.test_compiler) is not None or re.search('icc', config.test_compiler) is not None: - config.available_features.add("cancel") - config.available_features.add("taskgroup") - config.available_features.add("dependences") - config.available_features.add("flush") - config.available_features.add("taskyield") - config.available_features.add("master_callback") - config.available_features.add("single_callback") +if 'Linux' in config.operating_system: + config.available_features.add("linux") # to run with icc INTEL_LICENSE_FILE must be set if 'INTEL_LICENSE_FILE' in os.environ: config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE'] -# substitutions -if config.has_ompt: - config.substitutions.append(("FileCheck", config.test_filecheck)) - config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable")) +# substitutions config.substitutions.append(("%libomp-compile-and-run", \ "%libomp-compile && %libomp-run")) config.substitutions.append(("%libomp-cxx-compile-and-run", \ @@ -118,9 +109,14 @@ "%clangXX %cflags -std=c++11 %s -o %t" + libs)) config.substitutions.append(("%libomp-compile", \ "%clang %cflags %s -o %t" + libs)) +config.substitutions.append(("%libomp-tool", \ + "%clang %cflags -shared -fPIC -o %T/tool.so" + libs)) config.substitutions.append(("%libomp-run", "%t")) config.substitutions.append(("%clangXX", config.test_cxx_compiler)) config.substitutions.append(("%clang", config.test_compiler)) config.substitutions.append(("%openmp_flag", config.test_openmp_flag)) config.substitutions.append(("%cflags", config.test_cflags)) +if config.has_ompt: + config.substitutions.append(("FileCheck", config.test_filecheck)) + config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable")) Index: runtime/test/lit.site.cfg.in =================================================================== --- runtime/test/lit.site.cfg.in +++ runtime/test/lit.site.cfg.in @@ -11,7 +11,7 @@ config.operating_system = "@CMAKE_SYSTEM_NAME@" config.hwloc_library_dir = "@LIBOMP_HWLOC_LIBRARY_DIR@" config.using_hwloc = @LIBOMP_USE_HWLOC@ -config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_BLAME@ and @LIBOMP_OMPT_TRACE@ +config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_OPTIONAL@ config.has_libm = @LIBOMP_HAVE_LIBM@ config.has_libatomic = @LIBOMP_HAVE_LIBATOMIC@ Index: runtime/test/ompt/callback.h =================================================================== --- runtime/test/ompt/callback.h +++ runtime/test/ompt/callback.h @@ -1,119 +1,660 @@ +#define _BSD_SOURCE #include #include +#include #include +#include "ompt-signal.h" -static ompt_get_task_id_t ompt_get_task_id; -static ompt_get_task_frame_t ompt_get_task_frame; -static ompt_get_thread_id_t ompt_get_thread_id; -static ompt_get_parallel_id_t ompt_get_parallel_id; +static const char* ompt_thread_type_t_values[] = { + NULL, + "ompt_thread_initial", + "ompt_thread_worker", + "ompt_thread_other" +}; + +static const char* ompt_task_status_t_values[] = { + NULL, + "ompt_task_complete", + "ompt_task_yield", + "ompt_task_cancel", + "ompt_task_others" +}; +static const char* ompt_cancel_flag_t_values[] = { + "ompt_cancel_parallel", + "ompt_cancel_sections", + "ompt_cancel_do", + "ompt_cancel_taskgroup", + "ompt_cancel_activated", + "ompt_cancel_detected", + "ompt_cancel_discarded_task" +}; + +static ompt_set_callback_t ompt_set_callback; +static ompt_get_task_info_t ompt_get_task_info; +static ompt_get_thread_data_t ompt_get_thread_data; +static ompt_get_parallel_info_t ompt_get_parallel_info; +static ompt_get_unique_id_t ompt_get_unique_id; +static ompt_get_num_places_t ompt_get_num_places; +static ompt_get_place_proc_ids_t ompt_get_place_proc_ids; +static ompt_get_place_num_t ompt_get_place_num; +static ompt_get_partition_place_nums_t ompt_get_partition_place_nums; +static ompt_get_proc_id_t ompt_get_proc_id; +static ompt_enumerate_states_t ompt_enumerate_states; +static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls; static void print_ids(int level) { - ompt_frame_t* frame = ompt_get_task_frame(level); - printf("%" PRIu64 ": level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p\n", ompt_get_thread_id(), level, ompt_get_parallel_id(level), ompt_get_task_id(level), frame->exit_runtime_frame, frame->reenter_runtime_frame); + ompt_frame_t* frame ; + ompt_data_t* parallel_data; + ompt_data_t* task_data; + int exists_task = ompt_get_task_info(level, NULL, &task_data, &frame, ¶llel_data, NULL); + if (frame) + { + printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p\n", ompt_get_thread_data()->value, level, exists_task ? parallel_data->value : 0, exists_task ? task_data->value : 0, frame->exit_runtime_frame, frame->reenter_runtime_frame); + } + else + printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", frame=%p\n", ompt_get_thread_data()->value, level, exists_task ? parallel_data->value : 0, exists_task ? task_data->value : 0, frame); } #define print_frame(level)\ do {\ - printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", ompt_get_thread_id(), level, __builtin_frame_address(level));\ + printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", ompt_get_thread_data()->value, level, __builtin_frame_address(level));\ } while(0) +#define print_current_address(id)\ +{} /* Empty block between "#pragma omp ..." and __asm__ statement as a workaround for icc bug */ \ +__asm__("nop"); /* provide an instruction as jump target (compiler would insert an instruction if label is target of a jmp ) */ \ +ompt_label_##id:\ + printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, (char*)(&& ompt_label_##id)-1, (char*)(&& ompt_label_##id)-4) + /* "&& label" returns the address of the label (GNU extension); works with gcc, clang, icc */ + /* for void-type runtime function, the label is after the nop (-1), for functions with return value, there is a mov instruction before the label (-4) */ + +#define print_fuzzy_address(id)\ +{} /* Empty block between "#pragma omp ..." and __asm__ statement as a workaround for icc bug */ \ +__asm__("nop"); /* provide an instruction as jump target (compiler would insert an instruction if label is target of a jmp ) */ \ +ompt_label_##id:\ + printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_##id))/256-1, ((uint64_t)(char*)(&& ompt_label_##id))/256) + /* "&& label" returns the address of the label (GNU extension); works with gcc, clang, icc */ + /* for void-type runtime function, the label is after the nop (-1), for functions with return value, there is a mov instruction before the label (-4) */ + +static void format_task_type(int type, char* buffer) +{ + char* progress = buffer; + if(type & ompt_task_initial) progress += sprintf(progress, "ompt_task_initial"); + if(type & ompt_task_implicit) progress += sprintf(progress, "ompt_task_implicit"); + if(type & ompt_task_explicit) progress += sprintf(progress, "ompt_task_explicit"); + if(type & ompt_task_target) progress += sprintf(progress, "ompt_task_target"); + if(type & ompt_task_undeferred) progress += sprintf(progress, "|ompt_task_undeferred"); + if(type & ompt_task_untied) progress += sprintf(progress, "|ompt_task_untied"); + if(type & ompt_task_final) progress += sprintf(progress, "|ompt_task_final"); + if(type & ompt_task_mergeable) progress += sprintf(progress, "|ompt_task_mergeable"); + if(type & ompt_task_merged) progress += sprintf(progress, "|ompt_task_merged"); +} + +static void +on_ompt_callback_mutex_acquire( + ompt_mutex_kind_t kind, + unsigned int hint, + unsigned int impl, + ompt_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_critical: + printf("%" PRIu64 ": ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_atomic: + printf("%" PRIu64 ": ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_ordered: + printf("%" PRIu64 ": ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_mutex_acquired( + ompt_mutex_kind_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_critical: + printf("%" PRIu64 ": ompt_event_acquired_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_atomic: + printf("%" PRIu64 ": ompt_event_acquired_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_ordered: + printf("%" PRIu64 ": ompt_event_acquired_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_mutex_released( + ompt_mutex_kind_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_release_nest_lock_last: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_critical: + printf("%" PRIu64 ": ompt_event_release_critical: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_atomic: + printf("%" PRIu64 ": ompt_event_release_atomic: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_ordered: + printf("%" PRIu64 ": ompt_event_release_ordered: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + default: + break; + } +} + +static void +on_ompt_callback_nest_lock( + ompt_scope_endpoint_t endpoint, + ompt_wait_id_t wait_id, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + printf("%" PRIu64 ": ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_release_nest_lock_prev: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + } +} + +static void +on_ompt_callback_sync_region( + ompt_sync_region_kind_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + print_ids(0); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } + break; + case ompt_scope_end: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + } + break; + } +} + +static void +on_ompt_callback_sync_region_wait( + ompt_sync_region_kind_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } + break; + case ompt_scope_end: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_wait_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + } + break; + } +} + +static void +on_ompt_callback_flush( + ompt_data_t *thread_data, + const void *codeptr_ra) +{ + printf("%" PRIu64 ": ompt_event_flush: codeptr_ra=%p\n", thread_data->value, codeptr_ra); +} + +static void +on_ompt_callback_cancel( + ompt_data_t *task_data, + int flags, + const void *codeptr_ra) +{ + const char* first_flag_value; + const char* second_flag_value; + if(flags & ompt_cancel_parallel) + first_flag_value = ompt_cancel_flag_t_values[0]; + else if(flags & ompt_cancel_sections) + first_flag_value = ompt_cancel_flag_t_values[1]; + else if(flags & ompt_cancel_do) + first_flag_value = ompt_cancel_flag_t_values[2]; + else if(flags & ompt_cancel_taskgroup) + first_flag_value = ompt_cancel_flag_t_values[3]; + + if(flags & ompt_cancel_activated) + second_flag_value = ompt_cancel_flag_t_values[4]; + else if(flags & ompt_cancel_detected) + second_flag_value = ompt_cancel_flag_t_values[5]; + else if(flags & ompt_cancel_discarded_task) + second_flag_value = ompt_cancel_flag_t_values[6]; + + printf("%" PRIu64 ": ompt_event_cancel: task_data=%" PRIu64 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, task_data->value, first_flag_value, second_flag_value, flags, codeptr_ra); +} static void -on_ompt_event_barrier_begin( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id) +on_ompt_callback_idle( + ompt_scope_endpoint_t endpoint) { - printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id); - print_ids(0); + switch(endpoint) + { + case ompt_scope_begin: + printf("%" PRIu64 ": ompt_event_idle_begin:\n", ompt_get_thread_data()->value); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_idle_end:\n", ompt_get_thread_data()->value); + break; + } } static void -on_ompt_event_barrier_end( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id) +on_ompt_callback_implicit_task( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int team_size, + unsigned int thread_num) { - printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id); + switch(endpoint) + { + case ompt_scope_begin: + if(task_data->ptr) + printf("%s\n", "0: task_data initially not null"); + task_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, team_size, thread_num); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", team_size=%" PRIu32 ", thread_num=%" PRIu32 "\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, team_size, thread_num); + break; + } } static void -on_ompt_event_implicit_task_begin( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id) +on_ompt_callback_lock_init( + ompt_mutex_kind_t kind, + unsigned int hint, + unsigned int impl, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_implicit_task_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id); + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); + break; + default: + break; + } } static void -on_ompt_event_implicit_task_end( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id) +on_ompt_callback_lock_destroy( + ompt_mutex_kind_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id); + switch(kind) + { + case ompt_mutex_lock: + printf("%" PRIu64 ": ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + case ompt_mutex_nest_lock: + printf("%" PRIu64 ": ompt_event_destroy_nest_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", ompt_get_thread_data()->value, wait_id, codeptr_ra); + break; + default: + break; + } } static void -on_ompt_event_loop_begin( - ompt_parallel_id_t parallel_id, - ompt_task_id_t parent_task_id, - void *workshare_function) +on_ompt_callback_work( + ompt_work_type_t wstype, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + uint64_t count, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", workshare_function=%p\n", ompt_get_thread_id(), parallel_id, parent_task_id, workshare_function); + switch(endpoint) + { + case ompt_scope_begin: + switch(wstype) + { + case ompt_work_loop: + printf("%" PRIu64 ": ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_sections: + printf("%" PRIu64 ": ompt_event_sections_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_executor: + printf("%" PRIu64 ": ompt_event_single_in_block_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_other: + printf("%" PRIu64 ": ompt_event_single_others_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_workshare: + //impl + break; + case ompt_work_distribute: + printf("%" PRIu64 ": ompt_event_distribute_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_taskloop: + //impl + printf("%" PRIu64 ": ompt_event_taskloop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + } + break; + case ompt_scope_end: + switch(wstype) + { + case ompt_work_loop: + printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_sections: + printf("%" PRIu64 ": ompt_event_sections_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_executor: + printf("%" PRIu64 ": ompt_event_single_in_block_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_single_other: + printf("%" PRIu64 ": ompt_event_single_others_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_workshare: + //impl + break; + case ompt_work_distribute: + printf("%" PRIu64 ": ompt_event_distribute_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + case ompt_work_taskloop: + //impl + printf("%" PRIu64 ": ompt_event_taskloop_end: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra, count); + break; + } + break; + } } static void -on_ompt_event_loop_end( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id) +on_ompt_callback_master( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", ompt_get_thread_id(), parallel_id, task_id); + switch(endpoint) + { + case ompt_scope_begin: + printf("%" PRIu64 ": ompt_event_master_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_scope_end: + printf("%" PRIu64 ": ompt_event_master_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } } static void -on_ompt_event_parallel_begin( - ompt_task_id_t parent_task_id, - ompt_frame_t *parent_task_frame, - ompt_parallel_id_t parallel_id, +on_ompt_callback_parallel_begin( + ompt_data_t *parent_task_data, + const ompt_frame_t *parent_task_frame, + ompt_data_t* parallel_data, uint32_t requested_team_size, - void *parallel_function, - ompt_invoker_t invoker) + ompt_invoker_t invoker, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", parallel_function=%p, invoker=%d\n", ompt_get_thread_id(), parent_task_id, parent_task_frame->exit_runtime_frame, parent_task_frame->reenter_runtime_frame, parallel_id, requested_team_size, parallel_function, invoker); + if(parallel_data->ptr) + printf("%s\n", "0: parallel_data initially not null"); + parallel_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_parallel_begin: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", ompt_get_thread_data()->value, parent_task_data->value, parent_task_frame->exit_runtime_frame, parent_task_frame->reenter_runtime_frame, parallel_data->value, requested_team_size, codeptr_ra, invoker); } static void -on_ompt_event_parallel_end( - ompt_parallel_id_t parallel_id, - ompt_task_id_t task_id, - ompt_invoker_t invoker) +on_ompt_callback_parallel_end( + ompt_data_t *parallel_data, + ompt_data_t *task_data, + ompt_invoker_t invoker, + const void *codeptr_ra) { - printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d\n", ompt_get_thread_id(), parallel_id, task_id, invoker); + printf("%" PRIu64 ": ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, invoker, codeptr_ra); } +static void +on_ompt_callback_task_create( + ompt_data_t *parent_task_data, /* id of parent task */ + const ompt_frame_t *parent_frame, /* frame data for parent task */ + ompt_data_t* new_task_data, /* id of created task */ + int type, + int has_dependences, + const void *codeptr_ra) /* pointer to outlined function */ +{ + if(new_task_data->ptr) + printf("%s\n", "0: new_task_data initially not null"); + new_task_data->value = ompt_get_unique_id(); + char buffer[2048]; + + format_task_type(type, buffer); -void ompt_initialize( + //there is no paralllel_begin callback for implicit parallel region + //thus it is initialized in initial task + if(type & ompt_task_initial) + { + ompt_data_t *parallel_data; + ompt_get_parallel_info(0, ¶llel_data, NULL); + if(parallel_data->ptr) + printf("%s\n", "0: parallel_data initially not null"); + parallel_data->value = ompt_get_unique_id(); + } + + printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, parent_task_data ? parent_task_data->value : 0, parent_frame ? parent_frame->exit_runtime_frame : NULL, parent_frame ? parent_frame->reenter_runtime_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no"); +} + +static void +on_ompt_callback_task_schedule( + ompt_data_t *first_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *second_task_data) +{ + printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status); + if(prior_task_status == ompt_task_complete) + { + printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value); + } +} + +static void +on_ompt_callback_task_dependences( + ompt_data_t *task_data, + const ompt_task_dependence_t *deps, + int ndeps) +{ + printf("%" PRIu64 ": ompt_event_task_dependences: task_id=%" PRIu64 ", deps=%p, ndeps=%d\n", ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps); +} + +static void +on_ompt_callback_task_dependence( + ompt_data_t *first_task_data, + ompt_data_t *second_task_data) +{ + printf("%" PRIu64 ": ompt_event_task_dependence_pair: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value); +} + +static void +on_ompt_callback_thread_begin( + ompt_thread_type_t thread_type, + ompt_data_t *thread_data) +{ + if(thread_data->ptr) + printf("%s\n", "0: thread_data initially not null"); + thread_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value); +} + +static void +on_ompt_callback_thread_end( + ompt_data_t *thread_data) +{ + printf("%" PRIu64 ": ompt_event_thread_end: thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, thread_data->value); +} + +static int +on_ompt_callback_control_tool( + uint64_t command, + uint64_t modifier, + void *arg, + const void *codeptr_ra) +{ + ompt_frame_t* omptTaskFrame; + ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL); + printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_runtime_frame, omptTaskFrame->reenter_runtime_frame); + return 0; //success +} + +#define register_callback_t(name, type) \ +do{ \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == \ + ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ +}while(0) + +#define register_callback(name) register_callback_t(name, name##_t) + +int ompt_initialize( ompt_function_lookup_t lookup, - const char *runtime_version, - unsigned int ompt_version) + ompt_fns_t* fns) { - ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); - ompt_get_task_id = (ompt_get_task_id_t) lookup("ompt_get_task_id"); - ompt_get_task_frame = (ompt_get_task_frame_t) lookup("ompt_get_task_frame"); - ompt_get_thread_id = (ompt_get_thread_id_t) lookup("ompt_get_thread_id"); - ompt_get_parallel_id = (ompt_get_parallel_id_t) lookup("ompt_get_parallel_id"); + ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); + ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info"); + ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data"); + ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info"); + ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id"); - ompt_set_callback(ompt_event_barrier_begin, (ompt_callback_t) &on_ompt_event_barrier_begin); - ompt_set_callback(ompt_event_barrier_end, (ompt_callback_t) &on_ompt_event_barrier_end); - ompt_set_callback(ompt_event_implicit_task_begin, (ompt_callback_t) &on_ompt_event_implicit_task_begin); - ompt_set_callback(ompt_event_implicit_task_end, (ompt_callback_t) &on_ompt_event_implicit_task_end); - ompt_set_callback(ompt_event_loop_begin, (ompt_callback_t) &on_ompt_event_loop_begin); - ompt_set_callback(ompt_event_loop_end, (ompt_callback_t) &on_ompt_event_loop_end); - ompt_set_callback(ompt_event_parallel_begin, (ompt_callback_t) &on_ompt_event_parallel_begin); - ompt_set_callback(ompt_event_parallel_end, (ompt_callback_t) &on_ompt_event_parallel_end); - printf("0: NULL_POINTER=%p\n", NULL); + ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places"); + ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids"); + ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num"); + ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums"); + ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id"); + ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states"); + ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls"); + + register_callback(ompt_callback_mutex_acquire); + register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t); + register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t); + register_callback(ompt_callback_nest_lock); + register_callback(ompt_callback_sync_region); + register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t); + register_callback(ompt_callback_control_tool); + register_callback(ompt_callback_flush); + register_callback(ompt_callback_cancel); + register_callback(ompt_callback_idle); + register_callback(ompt_callback_implicit_task); + register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t); + register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t); + register_callback(ompt_callback_work); + register_callback(ompt_callback_master); + register_callback(ompt_callback_parallel_begin); + register_callback(ompt_callback_parallel_end); + register_callback(ompt_callback_task_create); + register_callback(ompt_callback_task_schedule); + register_callback(ompt_callback_task_dependences); + register_callback(ompt_callback_task_dependence); + register_callback(ompt_callback_thread_begin); + register_callback(ompt_callback_thread_end); + printf("0: NULL_POINTER=%p\n", (void*)NULL); + return 1; //success +} + +void ompt_finalize(ompt_fns_t* fns) +{ + printf("0: ompt_event_runtime_shutdown\n"); } -ompt_initialize_t ompt_tool() +ompt_fns_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) { - return &ompt_initialize; + static ompt_fns_t ompt_fns = {&ompt_initialize,&ompt_finalize}; + return &ompt_fns; } Index: runtime/test/ompt/cancel/cancel_parallel.c =================================================================== --- /dev/null +++ runtime/test/ompt/cancel/cancel_parallel.c @@ -0,0 +1,42 @@ +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// Current GOMP interface implementation does not support cancellation +// XFAIL: gcc + +#include "callback.h" +#include "omp.h" + +int main() +{ + #pragma omp parallel num_threads(2) + { + if(omp_get_thread_num() == 0) + { + printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_1))/256-1, ((uint64_t)(char*)(&& ompt_label_1))/256); + #pragma omp cancel parallel + print_fuzzy_address(1); //does not actually print the address but provides a label + } + else + { + usleep(100); + printf("%" PRIu64 ": fuzzy_address=0x%lx or 0x%lx\n", ompt_get_thread_data()->value, ((uint64_t)(char*)(&& ompt_label_2))/256-1, ((uint64_t)(char*)(&& ompt_label_2))/256); + #pragma omp cancellation point parallel + print_fuzzy_address(2); //does not actually print the address but provides a label + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_activated=17, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: fuzzy_address={{.*}}[[OTHER_RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_parallel|ompt_cancel_detected=33, codeptr_ra=[[OTHER_RETURN_ADDRESS]]{{[0-f][0-f]}} + + return 0; +} Index: runtime/test/ompt/cancel/cancel_taskgroup.c =================================================================== --- /dev/null +++ runtime/test/ompt/cancel/cancel_taskgroup.c @@ -0,0 +1,88 @@ +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// Current GOMP interface implementation does not support cancellation +// XFAIL: gcc + +#include "callback.h" +#include +#include + +int main() +{ + int condition=0; + #pragma omp parallel num_threads(2) + {} + + print_frame(0); + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp taskgroup + { + #pragma omp task shared(condition) + { + printf("start execute task 1\n"); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + #pragma omp cancellation point taskgroup + printf("end execute task 1\n"); + } + #pragma omp task shared(condition) + { + printf("start execute task 2\n"); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + #pragma omp cancellation point taskgroup + printf("end execute task 2\n"); + } + #pragma omp task shared(condition) + { + printf("start execute task 3\n"); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + #pragma omp cancellation point taskgroup + printf("end execute task 3\n"); + } + #pragma omp task if(0) shared(condition) + { + printf("start execute task 4\n"); + OMPT_WAIT(condition,1); + #pragma omp cancel taskgroup + printf("end execute task 4\n"); + } + OMPT_SIGNAL(condition); + } + } + #pragma omp barrier + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[FIRST_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[SECOND_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[CANCEL_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[PARENT_TASK_ID]], second_task_id=[[CANCEL_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[CANCEL_TASK_ID]], flags=ompt_cancel_taskgroup|ompt_cancel_activated=24, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[CANCEL_TASK_ID]], second_task_id=[[PARENT_TASK_ID]], prior_task_status=ompt_task_cancel=3 + + // CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]] + // CHECK-DAG: {{^}}{{[0-9]+}}: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_discarded_task=72, codeptr_ra=[[NULL]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_cancel: task_data={{[0-9]+}}, flags=ompt_cancel_taskgroup|ompt_cancel_detected=40, codeptr_ra={{0x[0-f]*}} + + return 0; +} Index: runtime/test/ompt/cancel/cancel_worksharing.c =================================================================== --- /dev/null +++ runtime/test/ompt/cancel/cancel_worksharing.c @@ -0,0 +1,68 @@ +// RUN: %libomp-compile && env OMP_CANCELLATION=true %libomp-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// Current GOMP interface implementation does not support cancellation +// XFAIL: gcc + + +#include "callback.h" +#include + +int main() +{ + int condition=0; + #pragma omp parallel num_threads(2) + { + int x = 0; + int i; + #pragma omp for + for(i = 0; i < 2; i++) + { + if(i == 0) + { + x++; + OMPT_SIGNAL(condition); + #pragma omp cancel for + } + else + { + x++; + OMPT_WAIT(condition,1); + usleep(10000); + #pragma omp cancellation point for + } + } + } + #pragma omp parallel num_threads(2) + { + #pragma omp sections + { + #pragma omp section + { + OMPT_SIGNAL(condition); + #pragma omp cancel sections + } + #pragma omp section + { + OMPT_WAIT(condition,2); + usleep(10000); + #pragma omp cancellation point sections + } + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + + // cancel for and sections + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_activated=20, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_activated=18, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_do|ompt_cancel_detected=36, codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[OTHER_THREAD_ID:[0-9]+]]: ompt_event_cancel: task_data=[[TASK_ID:[0-9]+]], flags=ompt_cancel_sections|ompt_cancel_detected=34, codeptr_ra={{0x[0-f]*}} + + return 0; +} Index: runtime/test/ompt/loadtool/tool_available.c =================================================================== --- /dev/null +++ runtime/test/ompt/loadtool/tool_available.c @@ -0,0 +1,59 @@ +// RUN: %libomp-compile -DCODE && %libomp-compile -DTOOL -o%T/tool.so -shared -fPIC && env OMP_TOOL_LIBRARIES=%T/tool.so %libomp-run | FileCheck %s +// REQUIRES: ompt + +/* + * This file contains code for an OMPT shared library tool to be + * loaded and the code for the OpenMP executable. + * -DTOOL enables the code for the tool during compilation + * -DCODE enables the code for the executable during compilation + * The RUN line compiles the two binaries and then tries to load + * the tool using the OMP_TOOL_LIBRARIES environmental variable. + */ + +#ifdef CODE +#include "omp.h" + +int main() +{ + #pragma omp parallel num_threads(2) + { + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}0: ompt_event_runtime_shutdown + + return 0; +} + +#endif /* CODE */ + +#ifdef TOOL + +#include +#include + +int ompt_initialize( + ompt_function_lookup_t lookup, + ompt_fns_t* fns) +{ + printf("0: NULL_POINTER=%p\n", (void*)NULL); + return 1; //success +} + +void ompt_finalize(ompt_fns_t* fns) +{ + printf("%d: ompt_event_runtime_shutdown\n", omp_get_thread_num()); +} + +ompt_fns_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) +{ + static ompt_fns_t ompt_fns = {&ompt_initialize,&ompt_finalize}; + return &ompt_fns; +} +#endif /* TOOL */ Index: runtime/test/ompt/misc/api_calls.c =================================================================== --- /dev/null +++ runtime/test/ompt/misc/api_calls.c @@ -0,0 +1,66 @@ +// RUN: %libomp-compile && env OMP_PLACES=cores %libomp-run | FileCheck %s +// REQUIRES: ompt, linux +#include "callback.h" +#include +#define __USE_GNU +#include +#undef __USE_GNU + +void print_list(char* function_name, int list[]) +{ + printf("%" PRIu64 ": %s(0)=(%d", ompt_get_thread_data()->value, function_name, list[0]); + int i; + for(i = 1; i < omp_get_place_num_procs(0); i++) + { + printf(",%d", list[i]); + } + printf(")\n"); +} + +int main() +{ + #pragma omp parallel num_threads(1) + { + printf("%" PRIu64 ": omp_get_num_places()=%d\n", ompt_get_thread_data()->value, omp_get_num_places()); + printf("%" PRIu64 ": ompt_get_num_places()=%d\n", ompt_get_thread_data()->value, ompt_get_num_places()); + + int omp_ids[omp_get_place_num_procs(0)]; + omp_get_place_proc_ids(0, omp_ids); + print_list("omp_get_place_proc_ids" ,omp_ids); + int ompt_ids[omp_get_place_num_procs(0)]; + ompt_get_place_proc_ids(0, omp_get_place_num_procs(0), ompt_ids); + print_list("ompt_get_place_proc_ids", ompt_ids); + + printf("%" PRIu64 ": omp_get_place_num()=%d\n", ompt_get_thread_data()->value, omp_get_place_num()); + printf("%" PRIu64 ": ompt_get_place_num()=%d\n", ompt_get_thread_data()->value, ompt_get_place_num()); + + int omp_nums[omp_get_partition_num_places()]; + omp_get_partition_place_nums(omp_nums); + print_list("omp_get_partition_place_nums" ,omp_nums); + int ompt_nums[omp_get_partition_num_places()]; + ompt_get_partition_place_nums(omp_get_partition_num_places(), ompt_nums); + print_list("ompt_get_partition_place_nums", ompt_nums); + + printf("%" PRIu64 ": sched_getcpu()=%d\n", ompt_get_thread_data()->value, sched_getcpu()); + printf("%" PRIu64 ": ompt_get_proc_id()=%d\n", ompt_get_thread_data()->value, ompt_get_proc_id()); + } + + // Check if libomp supports the callbacks for this test. + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: omp_get_num_places()=[[NUM_PLACES:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_num_places()=[[NUM_PLACES]] + + // CHECK: {{^}}[[MASTER_ID]]: omp_get_place_proc_ids(0)=([[PROC_IDS:[0-9\,]+]]) + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_proc_ids(0)=([[PROC_IDS]]) + + // CHECK: {{^}}[[MASTER_ID]]: omp_get_place_num()=[[PLACE_NUM:[-]?[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_place_num()=[[PLACE_NUM]] + + // CHECK: {{^}}[[MASTER_ID]]: sched_getcpu()=[[CPU_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_get_proc_id()=[[CPU_ID]] + + + return 0; +} Index: runtime/test/ompt/misc/control_tool.c =================================================================== --- /dev/null +++ runtime/test/ompt/misc/control_tool.c @@ -0,0 +1,27 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + #pragma omp parallel num_threads(1) + { + print_frame(1); + print_frame(0); + omp_control_tool(omp_control_tool_flush, 1, NULL); + print_current_address(0); + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_control_tool' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(1)=[[EXIT_FRAME:0x[0-f]*]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER_FRAME:0x[0-f]*]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter=[[REENTER_FRAME]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/misc/control_tool_no_ompt_support.c =================================================================== --- /dev/null +++ runtime/test/ompt/misc/control_tool_no_ompt_support.c @@ -0,0 +1,12 @@ +// RUN: %libomp-compile-and-run +#include + +int main() +{ + #pragma omp parallel num_threads(1) + { + omp_control_tool(omp_control_tool_flush, 1, NULL); + } + + return 0; +} Index: runtime/test/ompt/misc/idle.c =================================================================== --- /dev/null +++ runtime/test/ompt/misc/idle.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(3) + { + #pragma omp atomic + x++; + } + #pragma omp parallel num_threads(2) + { + #pragma omp atomic + x++; + } + + + printf("x=%d\n", x); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_idle' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_idle_begin: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_idle_end: + + return 0; +} Index: runtime/test/ompt/ompt-signal.h =================================================================== --- /dev/null +++ runtime/test/ompt/ompt-signal.h @@ -0,0 +1,24 @@ +// These functions are used to provide a signal-wait mechanism to enforce expected scheduling for the test cases. +// Conditional variable (s) needs to be shared! Initialize to 0 +#include + +#define OMPT_SIGNAL(s) ompt_signal(&s) +//inline +void ompt_signal(int* s) +{ + #pragma omp atomic + (*s)++; +} + +#define OMPT_WAIT(s,v) ompt_wait(&s,v) +// wait for s >= v +//inline +void ompt_wait(int *s, int v) +{ + int wait=0; + do{ + usleep(10); + #pragma omp atomic read + wait = (*s); + }while(wait + +int main() +{ + omp_set_nested(1); + omp_set_max_active_levels(1); + + #pragma omp parallel num_threads(2) + { + print_ids(0); + print_ids(1); + #pragma omp parallel num_threads(2) + { + print_ids(0); + print_ids(1); + print_ids(2); + } + print_fuzzy_address(1); + } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} Index: runtime/test/ompt/parallel/nested.c =================================================================== --- runtime/test/ompt/parallel/nested.c +++ runtime/test/ompt/parallel/nested.c @@ -3,9 +3,11 @@ // REQUIRES: ompt #include "callback.h" #include +#include int main() { + int condition=0; omp_set_nested(1); print_frame(0); @@ -15,6 +17,10 @@ print_ids(0); print_ids(1); print_frame(0); + + //get all implicit task events before starting nested: + #pragma omp barrier + #pragma omp parallel num_threads(4) { print_frame(1); @@ -22,17 +28,38 @@ print_ids(1); print_ids(2); print_frame(0); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,16); #pragma omp barrier + print_fuzzy_address(1); print_ids(0); } + print_fuzzy_address(2); print_ids(0); } + print_fuzzy_address(3); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! @@ -46,219 +73,224 @@ // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] // THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]] // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // nested parallel masters // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[NESTED_EXIT:0x[0-f]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] - // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end // explicit barrier - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] // implicit barrier - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] // implicit barrier - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // nested parallel worker threads // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] return 0; } Index: runtime/test/ompt/parallel/nested_lwt.c =================================================================== --- runtime/test/ompt/parallel/nested_lwt.c +++ runtime/test/ompt/parallel/nested_lwt.c @@ -3,35 +3,59 @@ // REQUIRES: ompt #include "callback.h" #include +#include int main() { omp_set_nested(1); + int condition; #pragma omp parallel num_threads(4) { print_ids(0); print_ids(1); + //get all implicit task events before starting nested: + #pragma omp barrier #pragma omp parallel num_threads(1) { print_ids(0); print_ids(1); print_ids(2); + //get all implicit task events before starting nested: + #pragma omp barrier #pragma omp parallel num_threads(4) { print_ids(0); print_ids(1); print_ids(2); print_ids(3); + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,16); } + print_fuzzy_address(1); } + print_fuzzy_address(2); } + print_fuzzy_address(3); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! @@ -48,251 +72,261 @@ // THREADS: 0: NULL_POINTER=[[NULL:.*$]] - // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // nested parallel masters // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[MASTER_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit=[[NESTED_NESTED_TASK_FRAME_EXIT]], parent_task_frame.reenter=[[NESTED_NESTED_TASK_FRAME_ENTER:0x[0-f]+]], parallel_id=[[NESTED_NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_NESTED_TASK_FRAME_EXIT:0x[0-f]+]], reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NESTED_TASK_FRAME_EXIT]], reenter_frame=[[NESTED_TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // nested parallel worker threads // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // can't reliably tell which parallel region is the parent... - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} - // THREADS: {{^}}[[THREAD_ID]]: level 3: parallel_id=0, task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id={{[0-9]+}}, task_id={{[0-9]+}} + // THREADS: {{^}}[[THREAD_ID]]: task level 3: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[TASK_FRAME_ENTER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] return 0; Index: runtime/test/ompt/parallel/nested_serialized.c =================================================================== --- runtime/test/ompt/parallel/nested_serialized.c +++ runtime/test/ompt/parallel/nested_serialized.c @@ -18,13 +18,29 @@ print_ids(1); print_ids(2); } + print_fuzzy_address(1); } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! @@ -41,67 +57,71 @@ // THREADS: 0: NULL_POINTER=[[NULL:.*$]] - // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION:0x[0-f]+]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=[[NESTED_PARALLEL_FUNCTION]], invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 2: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[THREAD_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] return 0; } Index: runtime/test/ompt/parallel/no_thread_num_clause.c =================================================================== --- /dev/null +++ runtime/test/ompt/parallel/no_thread_num_clause.c @@ -0,0 +1,95 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +#include "callback.h" + +int main() +{ + omp_set_num_threads(4); + #pragma omp parallel + { + print_ids(0); + print_ids(1); + } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=(nil), parent_task_frame.reenter=(nil), new_task_id=281474976710658, codeptr_ra=(nil), task_type=ompt_task_initial=1, has_dependences=no + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}} + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} Index: runtime/test/ompt/parallel/normal.c =================================================================== --- runtime/test/ompt/parallel/normal.c +++ runtime/test/ompt/parallel/normal.c @@ -10,12 +10,27 @@ print_ids(0); print_ids(1); } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! @@ -28,43 +43,48 @@ // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] // THREADS: 0: NULL_POINTER=[[NULL:.*$]] - // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker={{.*}} + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{.*}} // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // THREADS: {{^}}[[THREAD_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] return 0; } Index: runtime/test/ompt/parallel/not_enough_threads.c =================================================================== --- /dev/null +++ runtime/test/ompt/parallel/not_enough_threads.c @@ -0,0 +1,76 @@ +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | FileCheck %s +// RUN: %libomp-compile && env OMP_THREAD_LIMIT=2 %libomp-run | %sort-threads | FileCheck --check-prefix=THREADS %s +// REQUIRES: ompt +#include "callback.h" + +int main() +{ + #pragma omp parallel num_threads(4) + { + print_ids(0); + print_ids(1); + } + print_fuzzy_address(1); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // Note that we cannot ensure that the worker threads have already called barrier_end and implicit_task_end before parallel_end! + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK-DAG: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK-DAG: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + + + // THREADS: 0: NULL_POINTER=[[NULL:.*$]] + // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_initial=1, thread_id=[[MASTER_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}} + + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // THREADS: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_thread_begin: thread_type=ompt_thread_worker=2, thread_id=[[THREAD_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // THREADS: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]] + // THREADS-NOT: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // THREADS: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + return 0; +} Index: runtime/test/ompt/parallel/parallel_if0.c =================================================================== --- /dev/null +++ runtime/test/ompt/parallel/parallel_if0.c @@ -0,0 +1,75 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" + +int main() +{ +// print_frame(0); + #pragma omp parallel if(0) + { +// print_frame(1); + print_ids(0); + print_ids(1); +// print_frame(0); + #pragma omp parallel if(0) + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); +// print_frame(0); + #pragma omp task + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + print_ids(3); + } + } + print_fuzzy_address(1); + } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]] + + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[NESTED_RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/parallel/serialized.c =================================================================== --- runtime/test/ompt/parallel/serialized.c +++ runtime/test/ompt/parallel/serialized.c @@ -4,21 +4,73 @@ int main() { +// print_frame(0); #pragma omp parallel num_threads(1) { +// print_frame(1); print_ids(0); print_ids(1); +// print_frame(0); + #pragma omp parallel num_threads(1) + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); +// print_frame(0); + #pragma omp task + { +// print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + print_ids(3); + } + } + print_fuzzy_address(1); } + print_fuzzy_address(2); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_event_implicit_task_end' // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:.+]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: parallel_data initially not null + // CHECK-NOT: 0: task_data initially not null + // CHECK-NOT: 0: thread_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[OUTER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: level 1: parallel_id=0, task_id=[[PARENT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=[[INNER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[EXPLICIT_TASK_ID]], second_task_id=[[NESTED_IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[EXPLICIT_TASK_ID]] + + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[NESTED_IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[INNER_RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[INNER_RETURN_ADDRESS]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=0, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], invoker=[[PARALLEL_INVOKER]], codeptr_ra=[[OUTER_RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[OUTER_RETURN_ADDRESS]] return 0; } Index: runtime/test/ompt/synchronization/barrier/explicit.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/explicit.c @@ -0,0 +1,57 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + + #pragma omp parallel num_threads(2) + { + #pragma omp atomic + x++; + + #pragma omp barrier + print_current_address(); + + #pragma omp atomic + x++; + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread explicit barrier + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + + + // worker thread explicit barrier + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} Index: runtime/test/ompt/synchronization/barrier/for_loop.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/for_loop.c @@ -0,0 +1,55 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int y[] = {0,1,2,3}; + + #pragma omp parallel num_threads(2) + { + //implicit barrier at end of for loop + int i; + #pragma omp for + for (i = 0; i < 4; i++) + { + y[i]++; + } + print_current_address(); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at loop end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + + // worker thread explicit barrier + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // worker thread implicit barrier after parallel + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} Index: runtime/test/ompt/synchronization/barrier/for_simd.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/for_simd.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include + +int main() +{ + int y[] = {0,1,2,3}; + + int i; + #pragma omp for simd + for (i = 0; i < 4; i++) + { + y[i]++; + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at simd loop end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + return 0; +} Index: runtime/test/ompt/synchronization/barrier/parallel_region.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/parallel_region.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + + //implicit barrier at end of a parallel region + #pragma omp parallel num_threads(2) + { + #pragma omp atomic + x++; + } + print_fuzzy_address(); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} Index: runtime/test/ompt/synchronization/barrier/sections.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/sections.c @@ -0,0 +1,63 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + + #pragma omp parallel num_threads(2) + { + //implicit barrier after sections with nowait but with lastprivates + //implicit barrier at end of sections + #pragma omp sections + { + #pragma omp section + { + #pragma omp atomic + x++; + } + + #pragma omp section + { + #pragma omp atomic + x++; + } + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at sections end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + + // worker thread implicit barrier at sections end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} Index: runtime/test/ompt/synchronization/barrier/single.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/single.c @@ -0,0 +1,60 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + + #pragma omp parallel num_threads(2) + { + //implicit barrier at end of single + #pragma omp single + { + x++; + } + print_fuzzy_address(); + //critical section to avoid merge of two barriers into one + #pragma omp critical + { + x++; + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at single end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + + + // worker thread implicit barrier at single end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[NULL]] + + return 0; +} Index: runtime/test/ompt/synchronization/critical.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/critical.c @@ -0,0 +1,31 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + #pragma omp critical + { + print_current_address(1); + print_ids(0); + } + print_current_address(2); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_critical: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_critical: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/flush.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/flush.c @@ -0,0 +1,32 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// GCC generates code that does not call the runtime for the flush construct +// XFAIL: gcc + +#include "callback.h" +#include + +int main() +{ + #pragma omp parallel num_threads(2) + { + int tid = omp_get_thread_num(); + + #pragma omp flush + print_current_address(1); + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_flush' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_flush: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: current_address=[[RETURN_ADDRESS]] + // + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_flush: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: current_address=[[RETURN_ADDRESS]] + + + + return 0; +} Index: runtime/test/ompt/synchronization/lock.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/lock.c @@ -0,0 +1,44 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + //need to use an OpenMP construct so that OMPT will be initalized + #pragma omp parallel num_threads(1) + print_ids(0); + + omp_lock_t lock; + printf("%" PRIu64 ": &lock: %lli\n", ompt_get_thread_data()->value, (long long) &lock); + omp_init_lock(&lock); + print_current_address(1); + omp_set_lock(&lock); + print_current_address(2); + omp_unset_lock(&lock); + print_current_address(3); + omp_destroy_lock(&lock); + print_current_address(4); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: &lock: [[WAIT_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_init_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/master.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/master.c @@ -0,0 +1,36 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// GCC generates code that does not call the runtime for the master construct +// XFAIL: gcc + +#include "callback.h" +#include + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + print_fuzzy_address(1); + x++; + } + print_current_address(2); + } + + printf("%" PRIu64 ": x=%d\n", ompt_get_thread_data()->value, x); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_master_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_master_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS_END:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: current_address=[[RETURN_ADDRESS_END]] + + + return 0; +} Index: runtime/test/ompt/synchronization/nest_lock.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/nest_lock.c @@ -0,0 +1,52 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + //need to use an OpenMP construct so that OMPT will be initalized + #pragma omp parallel num_threads(1) + print_ids(0); + + omp_nest_lock_t nest_lock; + printf("%" PRIu64 ": &nest_lock: %lli\n", ompt_get_thread_data()->value, (long long) &nest_lock); + omp_init_nest_lock(&nest_lock); + print_current_address(1); + omp_set_nest_lock(&nest_lock); + print_current_address(2); + omp_set_nest_lock(&nest_lock); + print_current_address(3); + omp_unset_nest_lock(&nest_lock); + print_current_address(4); + omp_unset_nest_lock(&nest_lock); + print_current_address(5); + omp_destroy_nest_lock(&nest_lock); + print_current_address(6); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/ordered.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/ordered.c @@ -0,0 +1,31 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + #pragma omp ordered + { + print_current_address(1); + print_ids(0); + } + print_current_address(2); + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_ordered: wait_id=[[WAIT_ID:[0-9]+]], hint={{[0-9]+}}, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_ordered: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/taskgroup.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/taskgroup.c @@ -0,0 +1,48 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include +#include + +int main() +{ + int condition=0; + int x=0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp taskgroup + { + print_current_address(1); + #pragma omp task + { + #pragma omp atomic + x++; + } + } + print_current_address(2); + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_master' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_cancel' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_thread_begin' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskgroup_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskgroup_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/taskwait.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/taskwait.c @@ -0,0 +1,35 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp task + { + x++; + } + #pragma omp taskwait + print_current_address(1); + } + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_begin: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: ompt_event_taskwait_end: parallel_id={{[0-9]+}}, task_id={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/test_lock.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/test_lock.c @@ -0,0 +1,54 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include + +int main() +{ + omp_lock_t lock; + omp_init_lock(&lock); + print_current_address(1); + + omp_test_lock(&lock); + print_current_address(2); + omp_unset_lock(&lock); + print_current_address(3); + + omp_set_lock(&lock); + print_current_address(4); + omp_test_lock(&lock); + print_current_address(5); + omp_unset_lock(&lock); + print_current_address(6); + + omp_destroy_lock(&lock); + print_current_address(7); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_lock: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/synchronization/test_nest_lock.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/test_nest_lock.c @@ -0,0 +1,42 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include + +int main() +{ + omp_nest_lock_t nest_lock; + omp_init_nest_lock(&nest_lock); + + omp_test_nest_lock(&nest_lock); + omp_unset_nest_lock(&nest_lock); + + omp_set_nest_lock(&nest_lock); + omp_test_nest_lock(&nest_lock); + omp_unset_nest_lock(&nest_lock); + omp_unset_nest_lock(&nest_lock); + + omp_destroy_nest_lock(&nest_lock); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_init_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra={{0x[0-f]+}} + + return 0; +} Index: runtime/test/ompt/synchronization/test_nest_lock_parallel.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/test_nest_lock_parallel.c @@ -0,0 +1,59 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + omp_nest_lock_t nest_lock; + omp_init_nest_lock(&nest_lock); + + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + omp_set_nest_lock(&nest_lock); + print_current_address(1); + } + #pragma omp barrier + omp_test_nest_lock(&nest_lock); //should fail for non-master + print_current_address(2); + #pragma omp barrier + #pragma omp master + { + omp_unset_nest_lock(&nest_lock); + print_current_address(3); + omp_unset_nest_lock(&nest_lock); + print_current_address(4); + } + } + + omp_destroy_nest_lock(&nest_lock); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_nest_lock' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID:[0-9]+]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_first: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_prev: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_release_nest_lock_last: wait_id=[[WAIT_ID]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_destroy_nest_lock: wait_id=[[WAIT_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_wait_nest_lock: wait_id=[[WAIT_ID]], hint=0, impl={{[0-9]+}}, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]] + // CHECK-NOT: {{^}}[[THREAD_ID]]: ompt_event_acquired_nest_lock_next: wait_id=[[WAIT_ID]] + // CHECK-NEXT: {{^}}[[THREAD_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] + + return 0; +} Index: runtime/test/ompt/tasks/dependences.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/dependences.c @@ -0,0 +1,53 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include +#include +#include + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp task depend(out:x) + { + x++; + usleep(100); + } + print_fuzzy_address(1); + + #pragma omp task depend(in:x) + { + x = -1; + } + } + } + + x++; + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependences' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependence' + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter=[[NULL]], new_task_id=[[FIRST_TASK:[0-f]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, task_type=ompt_task_explicit=4, has_dependences=yes + // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependences: task_id=[[FIRST_TASK]], deps={{0x[0-f]+}}, ndeps=1 + // CHECK: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + + // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter=[[NULL]], new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=yes + // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependences: task_id=[[SECOND_TASK]], deps={{0x[0-f]+}}, ndeps=1 + // CHECK: {{^}}{{[0-9]+}}: ompt_event_task_dependence_pair: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]] + + + return 0; +} Index: runtime/test/ompt/tasks/explicit_task.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/explicit_task.c @@ -0,0 +1,100 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int condition=0; + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + #pragma omp task shared(condition) + { + OMPT_SIGNAL(condition); + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + } + print_fuzzy_address(1); + OMPT_WAIT(condition,1); + print_ids(0); + } + #pragma omp barrier + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // <- ompt_event_task_create would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // explicit barrier after master + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // this is expected to come earlier and at MASTER: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} Index: runtime/test/ompt/tasks/serialized.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/serialized.c @@ -0,0 +1,93 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include +#include + +int main() +{ + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + int t = (int)sin(0.1); + #pragma omp task if(t) + { + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + } + print_fuzzy_address(1); + print_ids(0); + } + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // <- ompt_event_task_schedule ([[IMPLICIT_TASK_ID]], [[TASK_ID]]) would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // <- ompt_event_task_schedule ([[TASK_ID]], [[IMPLICIT_TASK_ID]]) would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reen + + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} Index: runtime/test/ompt/tasks/task_in_joinbarrier.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/task_in_joinbarrier.c @@ -0,0 +1,90 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int condition=0; + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + #pragma omp task shared(condition) + { + OMPT_SIGNAL(condition); + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + } + OMPT_WAIT(condition,1); + print_ids(0); + } + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // <- ompt_event_task_create would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // implicit barrier parallel + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} Index: runtime/test/ompt/tasks/task_types.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/task_types.c @@ -0,0 +1,112 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include +#include + + +void print_task_type(int id) +{ + #pragma omp critical + { + int task_type; + char buffer[2048]; + ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL); + format_task_type(task_type, buffer); + printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type); + } +}; + +int main() +{ + //initial task + print_task_type(0); + + int x; + //implicit task + #pragma omp parallel num_threads(1) + { + print_task_type(1); + x++; + } + + #pragma omp parallel num_threads(2) + #pragma omp master + { + //explicit task + #pragma omp task + { + print_task_type(2); + x++; + } + + //explicit task with undeferred + #pragma omp task if(0) + { + print_task_type(3); + x++; + } + + //explicit task with untied + #pragma omp task untied + { + print_task_type(4); + x++; + } + + //explicit task with final + #pragma omp task final(1) + { + print_task_type(5); + x++; + //nested explicit task with final and undeferred + #pragma omp task + { + print_task_type(6); + x++; + } + } + + //Mergeable task test deactivated for now + //explicit task with mergeable + /* + #pragma omp task mergeable if((int)sin(0)) + { + print_task_type(7); + x++; + } + */ + + //TODO: merged task + } + + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // CHECK-NOT: 0: parallel_data initially not null + // CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1 + // CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730 + + // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK-DAG: {{^[0-9]+}}: id=2 task_type=ompt_task_explicit=4 + + // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK-DAG: {{^[0-9]+}}: id=3 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_untied=268435460, has_dependences=no + // CHECK-DAG: {{^[0-9]+}}: id=4 task_type=ompt_task_explicit|ompt_task_untied=268435460 + + // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_final=536870916, has_dependences=no + // CHECK-DAG: {{^[0-9]+}}: id=5 task_type=ompt_task_explicit|ompt_task_final=536870916 + + // CHECK-DAG: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no + // CHECK-DAG: {{^[0-9]+}}: id=6 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644 + + return 0; +} Index: runtime/test/ompt/tasks/task_types_serialized.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/task_types_serialized.c @@ -0,0 +1,112 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt + +#include "callback.h" +#include + +void print_task_type(int id) +{ + #pragma omp critical + { + int task_type; + char buffer[2048]; + ompt_get_task_info(0, &task_type, NULL, NULL, NULL, NULL); + format_task_type(task_type, buffer); + printf("%" PRIu64 ": id=%d task_type=%s=%d\n", ompt_get_thread_data()->value, id, buffer, task_type); + } +}; + +int main() +{ + //initial task + print_task_type(0); + + int x; + //implicit task + #pragma omp parallel num_threads(1) + { + print_task_type(1); + x++; + } + + #pragma omp parallel num_threads(1) + #pragma omp master + { + //explicit task + #pragma omp task + { + print_task_type(2); + x++; + } + + //explicit task with undeferred + #pragma omp task if(0) + { + print_task_type(3); + x++; + } + + //explicit task with untied + #pragma omp task untied + { + print_task_type(4); + x++; + } + + //explicit task with final + #pragma omp task final(1) + { + print_task_type(5); + x++; + //nested explicit task with final and undeferred + #pragma omp task + { + print_task_type(6); + x++; + } + } + +/* + //TODO:not working + //explicit task with mergeable + #pragma omp task mergeable + { + print_task_type(7); + x++; + } +*/ + + //TODO: merged task + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_create: parent_task_id=0, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[NULL]], new_task_id={{[0-9]+}}, codeptr_ra=[[NULL]], task_type=ompt_task_initial=1, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: id=0 task_type=ompt_task_initial=1 + // CHECK: {{^}}[[MASTER_ID]]: id=1 task_type=ompt_task_implicit|ompt_task_undeferred=134217730 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK: {{^[0-9]+}}: id=2 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // CHECK: {{^[0-9]+}}: id=3 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188, has_dependences=no + // CHECK: {{^[0-9]+}}: id=4 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_untied=402653188 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no + // CHECK: {{^[0-9]+}}: id=5 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644 + + // CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644, has_dependences=no + // CHECK: {{^[0-9]+}}: id=6 task_type=ompt_task_explicit|ompt_task_undeferred|ompt_task_final=671088644 + + // ___CHECK: {{^[0-9]+}}: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id={{[0-9]+}}, codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no + // ___CHECK: {{^[0-9]+}}: id=7 task_type=ompt_task_explicit|ompt_task_undeferred=134217732 + + return 0; +} Index: runtime/test/ompt/tasks/taskyield.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/taskyield.c @@ -0,0 +1,62 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// Current GOMP interface implements taskyield as stub +// XFAIL: gcc + +#include "callback.h" +#include +#include + +int main() +{ + int condition=0, x=0; + #pragma omp parallel num_threads(2) + { + #pragma omp master + { + #pragma omp task shared(condition) + { + OMPT_SIGNAL(condition); + OMPT_WAIT(condition,2); + } + OMPT_WAIT(condition,1); + #pragma omp task shared(x) + { + x++; + } + printf("%" PRIu64 ": before yield\n", ompt_get_thread_data()->value); + #pragma omp taskyield + printf("%" PRIu64 ": after yield\n", ompt_get_thread_data()->value); + OMPT_SIGNAL(condition); + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID:[0-9]+]], team_size={{[0-9]+}}, thread_num={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[WORKER_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[MAIN_TASK:[0-9]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=no + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[MAIN_TASK]], prior_task_status=ompt_task_yield=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[MAIN_TASK]], second_task_id=[[IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_task_schedule: first_task_id={{[0-9]+}}, second_task_id=[[WORKER_TASK]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[WORKER_TASK]], second_task_id={{[0-9]+}}, prior_task_status=ompt_task_complete=1 + + + + + + return 0; +} Index: runtime/test/ompt/tasks/untied_task.c =================================================================== --- /dev/null +++ runtime/test/ompt/tasks/untied_task.c @@ -0,0 +1,107 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" +#include + +int main() +{ + int condition=0; + omp_set_nested(0); + print_frame(0); + #pragma omp parallel num_threads(2) + { + print_frame(1); + print_ids(0); + print_ids(1); + print_frame(0); + #pragma omp master + { + print_ids(0); + #pragma omp task untied shared(condition) + { + OMPT_SIGNAL(condition); + print_frame(1); + print_ids(0); + print_ids(1); + print_ids(2); + #pragma omp task if(0) + { + print_ids(0); + print_ids(1); + print_ids(2); + } + print_ids(0); + print_ids(1); + print_ids(2); + } + OMPT_WAIT(condition,1); + print_ids(0); + } + #pragma omp barrier + print_ids(0); + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + + + // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + + // make sure initial data pointers are null + // CHECK-NOT: 0: new_task_data initially not null + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // nested parallel masters + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // <- ompt_event_task_create would be expected here + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // explicit barrier after master + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // implicit barrier parallel + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // this is expected to come earlier and at MASTER: + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] + + + + return 0; +} Index: runtime/test/ompt/worksharing/for/auto_split.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/auto_split.c @@ -0,0 +1,8 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt +// GCC doesn't call runtime for auto = static schedule +// XFAIL: gcc + +#define SCHEDULE auto +#include "base_split.h" Index: runtime/test/ompt/worksharing/for/base.h =================================================================== --- runtime/test/ompt/worksharing/for/base.h +++ runtime/test/ompt/worksharing/for/base.h @@ -9,28 +9,35 @@ for (i = 0; i < 4; i++) { } + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, parallel_function=0x{{[0-f]+}}, invoker={{.*}} + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_implicit_task_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] return 0; } Index: runtime/test/ompt/worksharing/for/base_serialized.h =================================================================== --- runtime/test/ompt/worksharing/for/base_serialized.h +++ runtime/test/ompt/worksharing/for/base_serialized.h @@ -8,14 +8,21 @@ #pragma omp parallel for num_threads(1) schedule(SCHEDULE) for (i = 0; i < 1; i++) { } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, parallel_function=0x{{[0-f]+}}, invoker={{.+}} + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=1, codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}} - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], workshare_function=0x{{[0-f]+}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[IMPLICIT_TASK_ID]], codeptr_ra=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end: parallel_id={{[PARALLEL_ID,0]}}, task_id=[[IMPLICIT_TASK_ID]] return 0; } Index: runtime/test/ompt/worksharing/for/base_split.h =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/base_split.h @@ -0,0 +1,66 @@ +#include "callback.h" +#include + +/* With the combined parallel-for construct (base.h), the return-addresses are hard to compare. + With the separate parallel and for-nowait construct, the addresses become more predictable, + but the begin of the for-loop still generates additional code, so the offset of loop-begin + to the label is >4 Byte. +*/ + +int main() +{ + unsigned int i; + + #pragma omp parallel num_threads(4) + { + print_current_address(0); + #pragma omp for schedule(SCHEDULE) nowait + for (i = 0; i < 4; i++) { + print_fuzzy_address(1); + } + print_fuzzy_address(2); + } + print_fuzzy_address(3); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[PARALLEL_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker={{[0-9]+}} + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, invoker={{[0-9]+}}, codeptr_ra=[[PARALLEL_RETURN_ADDRESS]] + // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[PARALLEL_RETURN_ADDRESS]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_loop_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[LOOP_END_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[THREAD_ID]]: fuzzy_address={{.*}}[[LOOP_END_RETURN_ADDRESS]] + + + // CHECK-LOOP: 0: NULL_POINTER=[[NULL:.*$]] + // CHECK-LOOP: 0: ompt_event_runtime_shutdown + // CHECK-LOOP: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[NULL]], parent_task_frame.reenter={{0x[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra={{0x[0-f]+}}, invoker={{[0-9]+}} + // CHECK-LOOP: {{^}}[[MASTER_ID]]: ompt_event_loop_begin: parallel_id=[[PARALLEL_ID]], parent_task_id={{[0-9]+}}, codeptr_ra=[[LOOP_BEGIN_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + // CHECK-LOOP: {{^}}{{[0-9]+}}: fuzzy_address={{.*}}[[LOOP_BEGIN_RETURN_ADDRESS]] + + + return 0; +} Index: runtime/test/ompt/worksharing/for/dynamic_split.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/dynamic_split.c @@ -0,0 +1,6 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt + +#define SCHEDULE dynamic +#include "base_split.h" Index: runtime/test/ompt/worksharing/for/guided_split.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/guided_split.c @@ -0,0 +1,6 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt + +#define SCHEDULE guided +#include "base_split.h" Index: runtime/test/ompt/worksharing/for/runtime_split.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/runtime_split.c @@ -0,0 +1,6 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt + +#define SCHEDULE runtime +#include "base_split.h" Index: runtime/test/ompt/worksharing/for/static_split.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/for/static_split.c @@ -0,0 +1,8 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %S/base_split.h +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck --check-prefix=CHECK-LOOP %S/base_split.h +// REQUIRES: ompt +// GCC doesn't call runtime for static schedule +// XFAIL: gcc + +#define SCHEDULE static +#include "base_split.h" Index: runtime/test/ompt/worksharing/sections.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/sections.c @@ -0,0 +1,36 @@ +// RUN: %libomp-compile-and-run | FileCheck %s +// REQUIRES: ompt +// GCC generates code that does not distinguish between sections and loops +// XFAIL: gcc + +#include "callback.h" +#include + +int main() +{ + #pragma omp parallel sections num_threads(2) + { + #pragma omp section + { + printf("%lu: section 1\n", ompt_get_thread_data()->value); + } + #pragma omp section + { + printf("%lu: section 2\n", ompt_get_thread_data()->value); + } + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN:0x[0-f]+]], count=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END:0x[0-f]+]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_sections_begin: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[SECT_BEGIN]], count=2 + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_sections_end: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, codeptr_ra=[[SECT_END]] + + + return 0; +} Index: runtime/test/ompt/worksharing/single.c =================================================================== --- /dev/null +++ runtime/test/ompt/worksharing/single.c @@ -0,0 +1,36 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// GCC generates code that does not call the runtime for the single construct +// XFAIL: gcc + +#include "callback.h" +#include + +int main() +{ + int x = 0; + #pragma omp parallel num_threads(2) + { + #pragma omp single + { + x++; + } + } + + printf("x=%d\n", x); + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[THREAD_ID_1:[0-9]+]]: ompt_event_single_in_block_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], parent_task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1 + // CHECK: {{^}}[[THREAD_ID_1]]: ompt_event_single_in_block_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1 + + // CHECK: {{^}}[[THREAD_ID_2:[0-9]+]]: ompt_event_single_others_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]+}}, count=1 + // CHECK: {{^}}[[THREAD_ID_2]]: ompt_event_single_others_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]+}}, count=1 + + + + return 0; +}