diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -47,6 +47,20 @@ set (LIBOMPTARGET_SYSTEM_TARGETS "") set (LIBOMPTARGET_TESTED_PLUGINS "") +# Check OMPT support +set(OMPT_DEFAULT_IN_LIBOMPTARGET FALSE) +if ((LIBOMP_HAVE_OMPT_SUPPORT) AND (NOT WIN32)) + set(OMPT_DEFAULT_IN_LIBOMPTARGET TRUE) +endif() +set(LIBOMP_OMPT_SUPPORT_IN_LIBOMPTARGET ${OMPT_DEFAULT_IN_LIBOMPTARGET} CACHE BOOL + "OMPT-support in libomptarget?") +if(LIBOMP_OMPT_SUPPORT_IN_LIBOMPTARGET) + add_definitions(-DOMPT_SUPPORT=1) + message(STATUS "OMPT enabled in libomptarget") +else() + message(STATUS "OMPT disabled in libomptarget") +endif() + # Check whether using debug mode. In debug mode, allow dumping progress # messages at runtime by default. Otherwise, it can be enabled # independently using the LIBOMPTARGET_ENABLE_DEBUG option. diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -20,10 +20,13 @@ ${CMAKE_CURRENT_SOURCE_DIR}/omptarget.cpp ) -set(LIBOMPTARGET_SRC_FILES ${LIBOMPTARGET_SRC_FILES} PARENT_SCOPE) +if(LIBOMP_OMPT_SUPPORT_IN_LIBOMPTARGET) + list(APPEND LIBOMPTARGET_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/ompt-target.cpp) + list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${LIBOMP_INCLUDE_DIR}) +endif() +set(LIBOMPTARGET_SRC_FILES ${LIBOMPTARGET_SRC_FILES} PARENT_SCOPE) include_directories(${LIBOMPTARGET_LLVM_INCLUDE_DIRS}) - # Build libomptarget library with libdl dependency. Add LLVMSupport # dependency if building in-tree with profiling enabled. if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING)) @@ -35,10 +38,15 @@ set(LLVM_LINK_COMPONENTS Support ) - add_llvm_library(omptarget SHARED ${LIBOMPTARGET_SRC_FILES} + if(LIBOMP_OMPT_SUPPORT_IN_LIBOMPTARGET) + add_llvm_library(omptarget SHARED ${LIBOMPTARGET_SRC_FILES} LINK_LIBS ${CMAKE_DL_LIBS} - "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports" - ) + "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports") + else() + add_llvm_library(omptarget SHARED ${LIBOMPTARGET_SRC_FILES} PARTIAL_SOURCES_INTENDED + LINK_LIBS ${CMAKE_DL_LIBS} + "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports") + endif() target_compile_definitions(omptarget PUBLIC OMPTARGET_PROFILE_ENABLED) endif() diff --git a/openmp/libomptarget/src/ompt-target.h b/openmp/libomptarget/src/ompt-target.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/ompt-target.h @@ -0,0 +1,49 @@ +#ifndef __OMPT_TARGET_H__ +#define __OMPT_TARGET_H__ + +#include "omp-tools.h" + +#define THREAD_LOCAL __thread + +#define ompt_callback(e) e##_callback + +// The following two types of structs are used to pass target-related OMPT callbacks to libomptarget. The structs' definitions +// should be in sync with the definitions in libomptarget/src/ompt_internal.h + +/* Struct to collect target callback pointers */ +typedef struct ompt_target_callbacks_internal_s { +#define ompt_event_macro(event, callback, eventid) \ + callback ompt_callback(event); + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_target_callbacks_internal_t; + +/* Bitmap to mark OpenMP 5.1 target events as registered*/ +typedef struct ompt_target_callbacks_active_s { + unsigned int enabled : 1; +#define ompt_event_macro(event, callback, eventid) unsigned int event : 1; + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_target_callbacks_active_t; + +/* Struct to collect necessary entry point*/ +typedef struct ompt_target_entry_point_s { + ompt_get_task_info_t ompt_get_task_info; +} ompt_target_entry_point_t; + +/* Thread local storage for OMPT*/ +typedef struct { + bool nowait; + ompt_data_t th_target; +} ompt_tls; + +extern ompt_target_callbacks_internal_t ompt_target_callbacks; + +extern ompt_target_callbacks_active_t ompt_target_enabled; + +extern ompt_target_entry_point_t ompt_target_entry_points; +#endif //__OMPT_TARGET_H__ diff --git a/openmp/libomptarget/src/ompt-target.cpp b/openmp/libomptarget/src/ompt-target.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/ompt-target.cpp @@ -0,0 +1,7 @@ +#include "ompt-target.h" + +ompt_target_callbacks_active_t ompt_target_enabled; + +ompt_target_callbacks_internal_t ompt_target_callbacks; + +ompt_target_entry_point_t ompt_target_entry_points; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -14,6 +14,10 @@ #include "device.h" #include "private.h" +#if OMPT_SUPPORT +#include "ompt-target.h" +#endif + #include #include #include @@ -38,6 +42,14 @@ static char *ProfileTraceFile = nullptr; #endif +#if OMPT_SUPPORT +static bool ompt_initialized = false; + +typedef bool (*libomptarget_start_tool_t)(ompt_target_callbacks_active_t *libomptarget_ompt_enabled, + ompt_target_callbacks_internal_t *libomptarget_ompt_callbacks, + ompt_target_entry_point_t *libomptarget_ompt_entry_points); +#endif + __attribute__((constructor(101))) void init() { DP("Init target library!\n"); PM = new PluginManager(); @@ -48,6 +60,23 @@ if (ProfileTraceFile) llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget"); #endif + +#if OMPT_SUPPORT + DP("OMPT_SUPPORT is enabled in libomptarget\n"); + if (!ompt_initialized) { + DP("Init OMPT for libomptarget\n"); + libomptarget_start_tool_t start_tool = + (libomptarget_start_tool_t)dlsym(RTLD_DEFAULT, "libomptarget_start_tool"); + if (start_tool) { + DP("Retrieve libomptarget_start_tool successfully\n"); + if (!start_tool(&ompt_target_enabled, &ompt_target_callbacks, &ompt_target_entry_points)) { + DP("Turn off OMPT in libomptarget because libomptarget_start_tool returns false\n"); + memset(&ompt_target_enabled, 0, sizeof(ompt_target_enabled)); + } + } + ompt_initialized = true; + } +#endif } __attribute__((destructor(101))) void deinit() { diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -312,6 +312,7 @@ set(LIBOMP_HAVE_OMPT_SUPPORT FALSE) endif() endif() +set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE) # Check if HWLOC support is available if(${LIBOMP_USE_HWLOC}) diff --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var --- a/openmp/runtime/src/include/omp-tools.h.var +++ b/openmp/runtime/src/include/omp-tools.h.var @@ -108,7 +108,7 @@ macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \ macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */ -#define FOREACH_OMPT_EVENT(macro) \ +#define FOREACH_OMPT_HOST_EVENT(macro) \ \ /*--- Mandatory Events ---*/ \ macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ @@ -121,17 +121,9 @@ macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ \ - macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ - macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ - macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ \ macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ \ - macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ - macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ - \ - macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ - macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ \ /* Optional Events */ \ macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \ @@ -145,7 +137,6 @@ \ macro (ompt_callback_masked, ompt_callback_masked_t, 21) /* task at masked begin or end */ \ \ - macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ \ macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \ \ @@ -164,11 +155,48 @@ macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \ \ macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */ \ + macro (ompt_callback_error, ompt_callback_error_t, 37) /* error */ + + +#define FOREACH_OMPT_DEVICE_EVENT(macro) \ + macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ + macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ + \ + macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ + macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ + + +#define FOREACH_OMPT_NOEMI_EVENT(macro) \ + macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ + macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ + macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ + macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ + + +#define FOREACH_OMPT_EMI_EVENT(macro) \ macro (ompt_callback_target_emi, ompt_callback_target_emi_t, 33) /* target */ \ macro (ompt_callback_target_data_op_emi,ompt_callback_target_data_op_emi_t,34) /* target data op */ \ macro (ompt_callback_target_submit_emi, ompt_callback_target_submit_emi_t, 35) /* target submit */ \ macro (ompt_callback_target_map_emi, ompt_callback_target_map_emi_t, 36) /* target map */ \ - macro (ompt_callback_error, ompt_callback_error_t, 37) /* error */ + +#define FOREACH_OMPT_50_TARGET_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) + +#define FOREACH_OMPT_51_TARGET_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +#define FOREACH_OMPT_EVENT(macro) \ + FOREACH_OMPT_HOST_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +#define FOREACH_OMPT_51_EVENT(macro) \ + FOREACH_OMPT_HOST_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) /***************************************************************************** * implementation specific types diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -55,12 +55,12 @@ #define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_emi_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_target_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_emi_implemented ompt_event_MAY_ALWAYS #define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS @@ -86,8 +86,8 @@ #define ompt_callback_masked_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_target_map_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_map_emi_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_target_map_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_target_map_emi_implemented ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_sync_region_implemented ompt_event_MAY_ALWAYS_OPTIONAL diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -23,6 +23,7 @@ #if KMP_OS_UNIX #include #endif +#include "kmp.h" /***************************************************************************** * ompt include files @@ -86,6 +87,8 @@ ompt_callbacks_active_t ompt_enabled; +ompt_target_callbacks_active_t ompt_target_enabled; + ompt_state_info_t ompt_state_info[] = { #define ompt_state_macro(state, code) {#state, state}, FOREACH_OMPT_STATE(ompt_state_macro) @@ -100,6 +103,8 @@ ompt_callbacks_internal_t ompt_callbacks; +ompt_target_callbacks_internal_t ompt_target_callbacks; + static ompt_start_tool_result_t *ompt_start_tool_result = NULL; #if KMP_OS_WINDOWS @@ -125,6 +130,29 @@ typedef ompt_start_tool_result_t *(*ompt_start_tool_t)(unsigned int, const char *); +_OMP_EXTERN OMPT_WEAK_ATTRIBUTE bool libomptarget_start_tool( + ompt_target_callbacks_active_t *libomptarget_ompt_enabled, + ompt_target_callbacks_internal_t *libomptarget_ompt_callbacks, + ompt_target_entry_point_t *libomptarget_ompt_entry_points) { + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + bool ret = false; + libomptarget_ompt_enabled->enabled = ompt_enabled.enabled; + if (ompt_enabled.enabled) { + ret = true; +#define ompt_event_macro(event_name, callback_type, event_id) \ + libomptarget_ompt_enabled->event_name = ompt_target_enabled.event_name; \ + libomptarget_ompt_callbacks->ompt_callback(event_name) = \ + ompt_target_callbacks.ompt_callback(event_name); + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) +#undef ompt_event_macro + libomptarget_ompt_entry_points->ompt_get_task_info = __ompt_get_task_info_internal; + + } + return ret; +} #if KMP_OS_DARWIN // While Darwin supports weak symbols, the library that wishes to provide a new @@ -567,7 +595,22 @@ else \ return ompt_set_always; - FOREACH_OMPT_EVENT(ompt_event_macro) + FOREACH_OMPT_HOST_EVENT(ompt_event_macro) + +#undef ompt_event_macro + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case event_name: \ + ompt_target_callbacks.ompt_callback(event_name) = (callback_type)callback; \ + ompt_target_enabled.event_name = (callback != 0); \ + if (callback) \ + return ompt_event_implementation_status(event_name); \ + else \ + return ompt_set_always; + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) + +#undef ompt_event_macro #undef ompt_event_macro @@ -594,8 +637,39 @@ return ompt_get_callback_failure; \ } - FOREACH_OMPT_EVENT(ompt_event_macro) + FOREACH_OMPT_HOST_EVENT(ompt_event_macro) +#undef ompt_event_macro + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case event_name: { \ + ompt_callback_t mycb = \ + (ompt_callback_t)ompt_target_callbacks.ompt_callback(event_name); \ + if (ompt_target_enabled.event_name && mycb) { \ + *callback = mycb; \ + return ompt_get_callback_success; \ + } \ + return ompt_get_callback_failure; \ + } + + FOREACH_OMPT_DEVICE_EVENT(ompt_event_macro) + +#undef ompt_event_macro + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case ompt_emi_event(event_name): { \ + ompt_callback_t mycb = \ + (ompt_callback_t)ompt_target_callbacks.ompt_emi_callback(event_name); \ + if (ompt_target_enabled.ompt_emi_event(event_name)) { \ + *callback = mycb; \ + return ompt_get_callback_success; \ + } \ + return ompt_get_callback_failure; \ + } + + FOREACH_OMPT_NOEMI_EVENT(ompt_event_macro) + +#undef ompt_event_macro #undef ompt_event_macro default: diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -26,24 +26,59 @@ #define ompt_callback(e) e##_callback +#define ompt_emi_callback(e) e##_emi_callback + +#define ompt_emi_callback_type(e) e##_emi_t + +#define ompt_emi_wrapper(e) e##_emi_wrapper + +#define ompt_emi_event(e) e##_emi + +/* Struct to collect host callback pointers */ typedef struct ompt_callbacks_internal_s { #define ompt_event_macro(event, callback, eventid) \ callback ompt_callback(event); - FOREACH_OMPT_EVENT(ompt_event_macro) + FOREACH_OMPT_HOST_EVENT(ompt_event_macro) #undef ompt_event_macro } ompt_callbacks_internal_t; +/* Struct to collect target callback pointers */ +typedef struct ompt_target_callbacks_internal_s { +#define ompt_event_macro(event, callback, eventid) \ + callback ompt_callback(event); + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_target_callbacks_internal_t; + +/* Bitmap to mark OpenMP 5.1 host events as registered*/ typedef struct ompt_callbacks_active_s { unsigned int enabled : 1; #define ompt_event_macro(event, callback, eventid) unsigned int event : 1; - FOREACH_OMPT_EVENT(ompt_event_macro) + FOREACH_OMPT_HOST_EVENT(ompt_event_macro) #undef ompt_event_macro } ompt_callbacks_active_t; +/* Bitmap to mark OpenMP 5.1 target events as registered*/ +typedef struct ompt_target_callbacks_active_s { + unsigned int enabled : 1; +#define ompt_event_macro(event, callback, eventid) unsigned int event : 1; + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_target_callbacks_active_t; + +/* Struct to collect necessary entry point*/ +typedef struct ompt_target_entry_point_s { + ompt_get_task_info_t ompt_get_task_info; +} ompt_target_entry_point_t; + #define TASK_TYPE_DETAILS_FORMAT(info) \ ((info->td_flags.task_serial || info->td_flags.tasking_ser) \ ? ompt_task_undeferred \ @@ -84,6 +119,7 @@ } ompt_thread_info_t; extern ompt_callbacks_internal_t ompt_callbacks; +extern ompt_target_callbacks_internal_t ompt_target_callbacks; #if OMPT_SUPPORT && OMPT_OPTIONAL #if USE_FAST_MEMORY @@ -109,6 +145,7 @@ int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg); extern ompt_callbacks_active_t ompt_enabled; +extern ompt_target_callbacks_active_t ompt_target_enabled; #if KMP_OS_WINDOWS #define UNLIKELY(x) (x)