diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -48,6 +48,19 @@ set (LIBOMPTARGET_SYSTEM_TARGETS "") set (LIBOMPTARGET_TESTED_PLUGINS "") +# Check OMPT support +set(OMPT_DEFAULT_IN_LIBOMPTARGET FALSE) +if ((LIBOMP_HAVE_OMPT_SUPPORT) AND (NOT WIN32)) + set(OMPT_DEFAULT_IN_LIBOMPTARGET TRUE) +endif() +set(LIBOMP_OMPT_SUPPORT_IN_LIBOMPTARGET ${OMPT_DEFAULT_IN_LIBOMPTARGET} CACHE BOOL + "OMPT-support in libomptarget?") +if(LIBOMP_OMPT_SUPPORT_IN_LIBOMPTARGET) + message(STATUS "OMPT enabled in libomptarget") +else() + message(STATUS "OMPT disabled in libomptarget") +endif() + # Check whether using debug mode. In debug mode, allow dumping progress # messages at runtime by default. Otherwise, it can be enabled # independently using the LIBOMPTARGET_ENABLE_DEBUG option. diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -20,8 +20,12 @@ ${CMAKE_CURRENT_SOURCE_DIR}/omptarget.cpp ) -set(LIBOMPTARGET_SRC_FILES ${LIBOMPTARGET_SRC_FILES} PARENT_SCOPE) +if(LIBOMP_OMPT_SUPPORT_IN_LIBOMPTARGET) + list(APPEND LIBOMPTARGET_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/ompt-target.cpp) + list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${LIBOMP_INCLUDE_DIR}) +endif() +set(LIBOMPTARGET_SRC_FILES ${LIBOMPTARGET_SRC_FILES} PARENT_SCOPE) include_directories(${LIBOMPTARGET_LLVM_INCLUDE_DIRS}) # Build libomptarget library with libdl dependency. diff --git a/openmp/libomptarget/src/ompt-target.h b/openmp/libomptarget/src/ompt-target.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/ompt-target.h @@ -0,0 +1,29 @@ +#ifndef LIBOMPTARGET_OMPT_TARGET_H +#define LIBOMPTARGET_OMPT_TARGET_H + +#include "omp-tools.h" + +#define _OMP_EXTERN extern "C" + +#define OMPT_WEAK_ATTRIBUTE __attribute__((weak)) + +// The following structs are used to pass target-related OMPT callbacks to +// libomptarget. The structs' definitions should be in sync with the definitions +// in libomptarget/src/ompt_internal.h + +/* Bitmap to mark OpenMP 5.1 target events as registered*/ +typedef struct ompt_target_callbacks_active_s { + unsigned int enabled : 1; +#define ompt_event_macro(event, callback, eventid) unsigned int event : 1; + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_target_callbacks_active_t; + +extern ompt_target_callbacks_active_t ompt_target_enabled; + +_OMP_EXTERN OMPT_WEAK_ATTRIBUTE bool +libomp_start_tool(ompt_target_callbacks_active_t *libomptarget_ompt_enabled); + +#endif // LIBOMPTARGET_OMPT_TARGET_H diff --git a/openmp/libomptarget/src/ompt-target.cpp b/openmp/libomptarget/src/ompt-target.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/ompt-target.cpp @@ -0,0 +1,3 @@ +#include "ompt-target.h" + +ompt_target_callbacks_active_t ompt_target_enabled; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -14,6 +14,10 @@ #include "device.h" #include "private.h" +#if OMPT_SUPPORT +#include "ompt-target.h" +#endif + #include #include #include @@ -181,6 +185,19 @@ dlsym(dynlib_handle, "__tgt_rtl_print_device_info"); } +#if OMPT_SUPPORT + DP("OMPT_SUPPORT is enabled in libomptarget\n"); + DP("Init OMPT for libomptarget\n"); + if (libomp_start_tool) { + DP("Retrieve libomp_start_tool successfully\n"); + if (!libomp_start_tool(&ompt_target_enabled)) { + DP("Turn off OMPT in libomptarget because libomp_start_tool returns " + "false\n"); + memset(&ompt_target_enabled, 0, sizeof(ompt_target_enabled)); + } + } +#endif + DP("RTLs loaded!\n"); return; diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -329,6 +329,7 @@ set(LIBOMP_HAVE_OMPT_SUPPORT FALSE) endif() endif() +set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE) # Check if HWLOC support is available if(${LIBOMP_USE_HWLOC}) diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt --- a/openmp/runtime/src/exports_so.txt +++ b/openmp/runtime/src/exports_so.txt @@ -25,7 +25,9 @@ # # OMPT API # - ompt_start_tool; # OMPT start interface + ompt_start_tool; # OMPT start interface + libomp_start_tool; # OMPT start interface for libomptarget + libomp_ompt_*; # OMPT callback functions ompc_*; # omp.h renames some standard functions to ompc_*. kmp_*; # Intel extensions. diff --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var --- a/openmp/runtime/src/include/omp-tools.h.var +++ b/openmp/runtime/src/include/omp-tools.h.var @@ -108,7 +108,7 @@ macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \ macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */ -#define FOREACH_OMPT_EVENT(macro) \ +#define FOREACH_OMPT_HOST_EVENT(macro) \ \ /*--- Mandatory Events ---*/ \ macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ @@ -121,17 +121,9 @@ macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ \ - macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ - macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ - macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ \ macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ \ - macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ - macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ - \ - macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ - macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ \ /* Optional Events */ \ macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \ @@ -145,7 +137,6 @@ \ macro (ompt_callback_masked, ompt_callback_masked_t, 21) /* task at masked begin or end */ \ \ - macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ \ macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \ \ @@ -164,11 +155,48 @@ macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \ \ macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */ \ + macro (ompt_callback_error, ompt_callback_error_t, 37) /* error */ + + +#define FOREACH_OMPT_DEVICE_EVENT(macro) \ + macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ + macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ + \ + macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ + macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ + + +#define FOREACH_OMPT_NOEMI_EVENT(macro) \ + macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ + macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ + macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ + macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ + + +#define FOREACH_OMPT_EMI_EVENT(macro) \ macro (ompt_callback_target_emi, ompt_callback_target_emi_t, 33) /* target */ \ macro (ompt_callback_target_data_op_emi,ompt_callback_target_data_op_emi_t,34) /* target data op */ \ macro (ompt_callback_target_submit_emi, ompt_callback_target_submit_emi_t, 35) /* target submit */ \ macro (ompt_callback_target_map_emi, ompt_callback_target_map_emi_t, 36) /* target map */ \ - macro (ompt_callback_error, ompt_callback_error_t, 37) /* error */ + +#define FOREACH_OMPT_50_TARGET_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) + +#define FOREACH_OMPT_51_TARGET_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +#define FOREACH_OMPT_EVENT(macro) \ + FOREACH_OMPT_HOST_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +#define FOREACH_OMPT_51_EVENT(macro) \ + FOREACH_OMPT_HOST_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) /***************************************************************************** * implementation specific types diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -86,8 +86,8 @@ #define ompt_callback_masked_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_target_map_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_map_emi_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_target_map_implemented ompt_event_MAY_ALWAYS_OPTIONAL +#define ompt_callback_target_map_emi_implemented ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_sync_region_implemented ompt_event_MAY_ALWAYS_OPTIONAL diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -86,6 +86,8 @@ ompt_callbacks_active_t ompt_enabled; +ompt_target_callbacks_active_t ompt_target_enabled; + ompt_state_info_t ompt_state_info[] = { #define ompt_state_macro(state, code) {#state, state}, FOREACH_OMPT_STATE(ompt_state_macro) @@ -100,6 +102,10 @@ ompt_callbacks_internal_t ompt_callbacks; +ompt_target_callbacks_internal_t ompt_target_callbacks; + +ompt_callbacks_internal_noemi_t ompt_callbacks_noemi; + static ompt_start_tool_result_t *ompt_start_tool_result = NULL; #if KMP_OS_WINDOWS @@ -125,6 +131,51 @@ typedef ompt_start_tool_result_t *(*ompt_start_tool_t)(unsigned int, const char *); +_OMP_EXTERN OMPT_WEAK_ATTRIBUTE bool +libomp_start_tool(ompt_target_callbacks_active_t *libomptarget_ompt_enabled) { + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + bool ret = false; + libomptarget_ompt_enabled->enabled = ompt_enabled.enabled; + if (ompt_enabled.enabled) { + ret = true; +#define ompt_event_macro(event_name, callback_type, event_id) \ + libomptarget_ompt_enabled->event_name = ompt_target_enabled.event_name; + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) +#undef ompt_event_macro + } + return ret; +} + +void ompt_callback_target_data_op_emi_wrapper( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_task_data, + ompt_data_t *target_data, ompt_id_t *host_op_id, + ompt_target_data_op_t optype, void *src_addr, int src_device_num, + void *dest_addr, int dest_device_num, size_t bytes, + const void *codeptr_ra) {} + +void ompt_callback_target_emi_wrapper(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_data_t *target_task_data, + ompt_data_t *target_data, + const void *codeptr_ra) {} + +void ompt_callback_target_map_emi_wrapper(ompt_data_t *target_data, + unsigned int nitems, void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) {} + +void ompt_callback_target_submit_emi_wrapper(ompt_scope_endpoint_t endpoint, + ompt_data_t *target_data, + ompt_id_t *host_op_id, + unsigned int requested_num_teams) { + +} + #if KMP_OS_DARWIN // While Darwin supports weak symbols, the library that wishes to provide a new @@ -571,7 +622,37 @@ else \ return ompt_set_always; - FOREACH_OMPT_EVENT(ompt_event_macro) + FOREACH_OMPT_HOST_EVENT(ompt_event_macro) + +#undef ompt_event_macro + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case event_name: \ + ompt_target_callbacks.ompt_callback(event_name) = (callback_type)callback; \ + ompt_target_enabled.event_name = (callback != 0); \ + if (callback) \ + return ompt_event_implementation_status(event_name); \ + else \ + return ompt_set_always; + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) + +#undef ompt_event_macro + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case event_name: \ + ompt_callbacks_noemi.ompt_callback(event_name) = (callback_type)callback; \ + ompt_target_enabled.ompt_emi_event(event_name) = (callback != 0); \ + if (callback) { \ + ompt_target_callbacks.ompt_emi_callback(event_name) = \ + (ompt_emi_callback_type(event_name))(&ompt_emi_wrapper(event_name)); \ + return ompt_event_implementation_status(event_name); \ + } else { \ + ompt_target_callbacks.ompt_emi_callback(event_name) = NULL; \ + return ompt_set_always; \ + } + + FOREACH_OMPT_NOEMI_EVENT(ompt_event_macro) #undef ompt_event_macro @@ -598,7 +679,56 @@ return ompt_get_callback_failure; \ } - FOREACH_OMPT_EVENT(ompt_event_macro) + FOREACH_OMPT_HOST_EVENT(ompt_event_macro) + +#undef ompt_event_macro + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case event_name: { \ + ompt_callback_t mycb = \ + (ompt_callback_t)ompt_target_callbacks.ompt_callback(event_name); \ + if (ompt_target_enabled.event_name && mycb) { \ + *callback = mycb; \ + return ompt_get_callback_success; \ + } \ + return ompt_get_callback_failure; \ + } + + FOREACH_OMPT_DEVICE_EVENT(ompt_event_macro) + +#undef ompt_event_macro + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case ompt_emi_event(event_name): { \ + ompt_callback_t mycb = \ + (ompt_callback_t)ompt_target_callbacks.ompt_emi_callback(event_name); \ + if (ompt_target_enabled.ompt_emi_event(event_name) && \ + mycb != (ompt_callback_t)(&ompt_emi_wrapper(event_name))) { \ + *callback = mycb; \ + return ompt_get_callback_success; \ + } \ + return ompt_get_callback_failure; \ + } + + FOREACH_OMPT_NOEMI_EVENT(ompt_event_macro) + +#undef ompt_event_macro + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case event_name: { \ + ompt_callback_t mycb = \ + (ompt_callback_t)ompt_callbacks_noemi.ompt_callback(event_name); \ + ompt_callback_t wrapper = \ + (ompt_callback_t)ompt_target_callbacks.ompt_emi_callback(event_name); \ + if (ompt_target_enabled.ompt_emi_event(event_name) && \ + wrapper == (ompt_callback_t)(&ompt_emi_wrapper(event_name))) { \ + *callback = mycb; \ + return ompt_get_callback_success; \ + } \ + return ompt_get_callback_failure; \ + } + + FOREACH_OMPT_NOEMI_EVENT(ompt_event_macro) #undef ompt_event_macro diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -26,24 +26,64 @@ #define ompt_callback(e) e##_callback +#define ompt_emi_callback(e) e##_emi_callback + +#define ompt_emi_callback_type(e) e##_emi_t + +#define ompt_emi_wrapper(e) e##_emi_wrapper + +#define ompt_emi_event(e) e##_emi + +/* Struct to collect host callback pointers */ typedef struct ompt_callbacks_internal_s { #define ompt_event_macro(event, callback, eventid) \ callback ompt_callback(event); - FOREACH_OMPT_EVENT(ompt_event_macro) + FOREACH_OMPT_HOST_EVENT(ompt_event_macro) #undef ompt_event_macro } ompt_callbacks_internal_t; +/* Struct to collect target callback pointers */ +typedef struct ompt_target_callbacks_internal_s { +#define ompt_event_macro(event, callback, eventid) \ + callback ompt_callback(event); + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_target_callbacks_internal_t; + +/* Struct to collect noemi callback pointers */ +typedef struct ompt_callbacks_internal_noemi_s { +#define ompt_event_macro(event, callback, eventid) \ + callback ompt_callback(event); + + FOREACH_OMPT_NOEMI_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_callbacks_internal_noemi_t; + +/* Bitmap to mark OpenMP 5.1 host events as registered*/ typedef struct ompt_callbacks_active_s { unsigned int enabled : 1; #define ompt_event_macro(event, callback, eventid) unsigned int event : 1; - FOREACH_OMPT_EVENT(ompt_event_macro) + FOREACH_OMPT_HOST_EVENT(ompt_event_macro) #undef ompt_event_macro } ompt_callbacks_active_t; +/* Bitmap to mark OpenMP 5.1 target events as registered*/ +typedef struct ompt_target_callbacks_active_s { + unsigned int enabled : 1; +#define ompt_event_macro(event, callback, eventid) unsigned int event : 1; + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_target_callbacks_active_t; + #define TASK_TYPE_DETAILS_FORMAT(info) \ ((info->td_flags.task_serial || info->td_flags.tasking_ser) \ ? ompt_task_undeferred \ @@ -84,6 +124,8 @@ } ompt_thread_info_t; extern ompt_callbacks_internal_t ompt_callbacks; +extern ompt_target_callbacks_internal_t ompt_target_callbacks; +extern ompt_callbacks_internal_noemi_t ompt_callbacks_noemi; #if OMPT_SUPPORT && OMPT_OPTIONAL #if USE_FAST_MEMORY @@ -109,6 +151,7 @@ int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg); extern ompt_callbacks_active_t ompt_enabled; +extern ompt_target_callbacks_active_t ompt_target_enabled; #if KMP_OS_WINDOWS #define UNLIKELY(x) (x)