diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -63,8 +63,24 @@ add_definitions(-DOMPTARGET_DEBUG) endif() +# OMPT support for libomptarget +set(OMPT_TARGET_DEFAULT FALSE) +if ((LIBOMP_HAVE_OMPT_SUPPORT) AND (NOT WIN32)) + set (OMPT_TARGET_DEFAULT TRUE) +endif() +set(LIBOMPTARGET_OMPT_SUPPORT ${OMPT_TARGET_DEFAULT} CACHE BOOL "OMPT-target-support?") +if (LIBOMPTARGET_OMPT_SUPPORT) + add_definitions(-DOMPT_SUPPORT=1) + message(STATUS "OMPT target enabled") +else() + message(STATUS "OMPT target disabled") +endif() + +pythonize_bool(LIBOMPTARGET_OMPT_SUPPORT) + set(LIBOMPTARGET_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) -include_directories(${LIBOMPTARGET_INCLUDE_DIR}) +set(LIBOMP_RUNTIME_SRC_BINARY_DIR ${CMAKE_BINARY_DIR}/openmp/runtime/src) +include_directories(${LIBOMPTARGET_INCLUDE_DIR} ${LIBOMP_RUNTIME_SRC_BINARY_DIR}) # Build target agnostic offloading library. set(LIBOMPTARGET_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) diff --git a/openmp/libomptarget/include/ompt-connector.h b/openmp/libomptarget/include/ompt-connector.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/ompt-connector.h @@ -0,0 +1,94 @@ +//=== ompt-connector.h - Target independent OpenMP target RTL -- C++ ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Support used by OMPT implementation to establish communication between +// various OpenMP runtime libraries: host openmp library, target-independent +// runtime library, and device-dependent runtime libraries. +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPT_CONNECTOR_H +#define _OMPT_CONNECTOR_H + +//**************************************************************************** +// global includes +//**************************************************************************** + +#include +#include + +//**************************************************************************** +// local includes +//**************************************************************************** + +#include +#include +#include + +//**************************************************************************** +// type declarations +//**************************************************************************** + +#define stringify(s) #s + +#define LIBOMPTARGET_GET_TARGET_OPID libomptarget_get_target_opid + +//**************************************************************************** +// type declarations +//**************************************************************************** + +typedef void (*library_ompt_connect_t)(ompt_start_tool_result_t *result); + +//---------------------------------------------------------------------------- +// class library_ompt_connector_t +// purpose: +// +// establish connection between openmp runtime libraries +// +// NOTE: This class is intended for use in attribute constructors. therefore, +// it should be declared within the constructor function to ensure that +// the class is initialized before it's methods are used +//---------------------------------------------------------------------------- + +class library_ompt_connector_t { +public: + void connect(ompt_start_tool_result_t *ompt_result) { + initialize(); + if (library_ompt_connect) { + library_ompt_connect(ompt_result); + } + }; + + library_ompt_connector_t(const char *library_name) { + library_connect_routine.append(library_name); + library_connect_routine.append("_ompt_connect"); + is_initialized = false; + }; + library_ompt_connector_t() = delete; + +private: + void initialize() { + if (is_initialized == false) { + DP("OMPT: library_ompt_connect = %s\n", library_connect_routine.c_str()); + void *vptr = dlsym(NULL, library_connect_routine.c_str()); + // If dlsym fails, library_ompt_connect will be null. connect() checks + // for this condition + library_ompt_connect = reinterpret_cast( + reinterpret_cast(vptr)); + DP("OMPT: library_ompt_connect = %p\n", library_ompt_connect); + is_initialized = true; + } + }; + +private: + bool is_initialized; + library_ompt_connect_t library_ompt_connect; + std::string library_connect_routine; +}; + +#endif diff --git a/openmp/libomptarget/include/ompt_device_callbacks.h b/openmp/libomptarget/include/ompt_device_callbacks.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/ompt_device_callbacks.h @@ -0,0 +1,264 @@ +//=== ompt_device_callbacks.h - Target independent OpenMP target RTL -- C++ +//-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Interface used by both target-independent and device-dependent runtimes +// to coordinate registration and invocation of OMPT callbacks +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPT_DEVICE_CALLBACKS_H +#define _OMPT_DEVICE_CALLBACKS_H + +//**************************************************************************** +// local includes +//**************************************************************************** + +#include +#include + +#include + +//**************************************************************************** +// macros +//**************************************************************************** +#define FOREACH_OMPT_TARGET_CALLBACK(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +/***************************************************************************** + * implementation specific types + *****************************************************************************/ + +//**************************************************************************** +// types +//**************************************************************************** + +typedef uint64_t (*id_interface_t)(); + +class ompt_device { +public: + ompt_device() { atomic_store(&enabled, false); }; + bool do_initialize() { + bool old = false; + return atomic_compare_exchange_strong(&enabled, &old, true); + }; + bool do_finalize() { + bool old = true; + return atomic_compare_exchange_strong(&enabled, &old, false); + }; + +private: + std::atomic enabled; +}; + +class ompt_device_callbacks_t { +public: + void ompt_callback_device_initialize(int device_num, const char *type) { + if (ompt_callback_device_initialize_fn) { + ompt_device *device = lookup_device(device_num); + if (device->do_initialize()) { + ompt_callback_device_initialize_fn( + device_num, type, (ompt_device_t *)device, lookup, documentation); + } + } + }; + + void ompt_callback_device_finalize(int device_num) { + if (ompt_callback_device_finalize_fn) { + ompt_device *device = lookup_device(device_num); + if (device->do_finalize()) { + ompt_callback_device_finalize_fn(device_num); + } + } + }; + + void ompt_callback_device_load(int device_num, const char *filename, + int64_t offset_in_file, void *vma_in_file, + size_t bytes, void *host_addr, + void *device_addr, uint64_t module_id) { + if (ompt_callback_device_load_fn) { + ompt_callback_device_load_fn(device_num, filename, offset_in_file, + vma_in_file, bytes, host_addr, device_addr, + module_id); + } + }; + + void ompt_callback_device_unload(int device_num, uint64_t module_id) { + if (ompt_callback_device_unload_fn) { + ompt_callback_device_unload_fn(device_num, module_id); + } + }; + + void ompt_callback_target_data_op_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_task_data, + ompt_data_t *target_data, ompt_target_data_op_t optype, void *src_addr, + int src_device_num, void *dest_addr, int dest_device_num, size_t bytes, + const void *codeptr_ra, id_interface_t id_interface, + ompt_id_t *host_op_id) { + if (ompt_callback_target_data_op_emi_fn) { + ompt_callback_target_data_op_emi_fn( + endpoint, target_task_data, target_data, host_op_id, optype, src_addr, + src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra); + } else if (endpoint == ompt_scope_begin) { + ompt_callback_target_data_op(target_data->value, optype, src_addr, + src_device_num, dest_addr, dest_device_num, + bytes, codeptr_ra, id_interface, host_op_id); + } + }; + + void ompt_callback_target_data_op(ompt_id_t target_id, + ompt_target_data_op_t optype, + void *src_addr, int src_device_num, + void *dest_addr, int dest_device_num, + size_t bytes, const void *codeptr_ra, + id_interface_t id_interface, + ompt_id_t *host_op_id) { + if (ompt_callback_target_data_op_fn) { + *host_op_id = id_interface(); + ompt_callback_target_data_op_fn(target_id, *host_op_id, optype, src_addr, + src_device_num, dest_addr, + dest_device_num, bytes, codeptr_ra); + } + }; + + void ompt_callback_target_emi(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, int device_num, + ompt_data_t *task_data, + ompt_data_t *target_task_data, + ompt_data_t *target_data, + const void *codeptr_ra, + id_interface_t id_interface) { + if (ompt_callback_target_emi_fn) { + ompt_callback_target_emi_fn(kind, endpoint, device_num, task_data, + target_task_data, target_data, codeptr_ra); + } else { + ompt_callback_target(kind, endpoint, device_num, task_data, codeptr_ra, + target_data, id_interface); + } + }; + + void ompt_callback_target(ompt_target_t kind, ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + const void *codeptr_ra, ompt_data_t *target_data, + id_interface_t id_interface) { + // if we reach this point, ompt_callback_target_emi was not + // invoked so a tool didn't provide a target id. thus, we must + // unconditionally get an id here. even if there is no + // ompt_callback_target, we need to have an id for use by other + // callbacks. + // note: + // on a scope_begin callback, id_interface will generate an id. + // on a scope_end callback, id_interface will return the existing + // id. it is safe to do the assignment again. + target_data->value = id_interface(); + if (ompt_callback_target_fn) { + ompt_callback_target_fn(kind, endpoint, device_num, task_data, + target_data->value, codeptr_ra); + } + }; + + void ompt_callback_target_map_emi(ompt_data_t *target_data, + unsigned int nitems, void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + if (ompt_callback_target_map_emi_fn) { + ompt_callback_target_map_emi_fn(target_data, nitems, host_addr, + device_addr, bytes, mapping_flags, + codeptr_ra); + } else { + ompt_callback_target_map(target_data->value, nitems, host_addr, + device_addr, bytes, mapping_flags, codeptr_ra); + } + }; + + void ompt_callback_target_map(ompt_id_t target_id, unsigned int nitems, + void **host_addr, void **device_addr, + size_t *bytes, unsigned int *mapping_flags, + const void *codeptr_ra) { + if (ompt_callback_target_map_fn) { + ompt_callback_target_map_fn(target_id, nitems, host_addr, device_addr, + bytes, mapping_flags, codeptr_ra); + } + }; + + void ompt_callback_target_submit_emi(ompt_scope_endpoint_t endpoint, + ompt_data_t *target_data, + unsigned int requested_num_teams, + id_interface_t id_interface, + ompt_id_t *host_op_id) { + if (ompt_callback_target_submit_emi_fn) { + ompt_callback_target_submit_emi_fn(endpoint, target_data, host_op_id, + requested_num_teams); + } else if (endpoint == ompt_scope_begin) { + return ompt_callback_target_submit( + target_data->value, requested_num_teams, id_interface, host_op_id); + } + }; + + void ompt_callback_target_submit(ompt_id_t target_id, + unsigned int requested_num_teams, + id_interface_t id_interface, + ompt_id_t *host_op_id) { + if (ompt_callback_target_submit_fn) { + *host_op_id = id_interface(); + ompt_callback_target_submit_fn(target_id, *host_op_id, + requested_num_teams); + } + }; + + void init() { + enabled = false; +#define init_name(name, type, code) name##_fn = 0; + FOREACH_OMPT_TARGET_CALLBACK(init_name) +#undef init_name + }; + + bool is_enabled() { return enabled; } + + void prepare_devices(int number_of_devices) { resize(number_of_devices); }; + + void register_callbacks(ompt_function_lookup_t lookup) { + enabled = true; +#define ompt_bind_callback(fn, type, code) \ + fn##_fn = (fn##_t)lookup(#fn); \ + DP("OMPT: class bound %s=%p\n", #fn, ((void *)(uint64_t)fn##_fn)); + FOREACH_OMPT_TARGET_CALLBACK(ompt_bind_callback); +#undef ompt_bind_callback + }; + + ompt_interface_fn_t lookup_callback(const char *interface_function_name) { +#define ompt_dolookup(fn, type, code) \ + if (strcmp(interface_function_name, #fn) == 0) \ + return (ompt_interface_fn_t)fn##_fn; + + FOREACH_OMPT_TARGET_CALLBACK(ompt_dolookup); +#undef ompt_dolookup + + return (ompt_interface_fn_t)0; + }; + + static ompt_interface_fn_t lookup(const char *interface_function_name); + +private: + bool enabled; + +#define declare_name(name, type, code) name##_t name##_fn; + FOREACH_OMPT_TARGET_CALLBACK(declare_name) +#undef declare_name + + static void resize(int number_of_devices); + static ompt_device *lookup_device(int device_num); + static const char *documentation; +}; + +extern ompt_device_callbacks_t ompt_device_callbacks; + +#endif diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -348,7 +348,8 @@ int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); -void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount); +void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, + uint64_t loop_tripcount); void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, uint64_t loop_tripcount); diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt --- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt +++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt @@ -75,6 +75,7 @@ impl/system.cpp impl/msgpack.cpp src/rtl.cpp + src/ompt_callback.cpp ${LIBOMPTARGET_EXTRA_SOURCE} ) diff --git a/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp b/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp @@ -0,0 +1,132 @@ +//===------ ompt_callback.cpp - Target RTLs Implementation -------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// OMPT support for AMDGPU +// +//===----------------------------------------------------------------------===// + +//**************************************************************************** +// global includes +//**************************************************************************** + +#include +#include + +#include + +//**************************************************************************** +// debug macro needed by include files +//**************************************************************************** + +#ifndef DEBUG_PREFIX +#define DEBUG_PREFIX "Target AMDGPU RTL" +#endif + +//**************************************************************************** +// local includes +//**************************************************************************** + +#include +#include +#include + +//**************************************************************************** +// macros +//**************************************************************************** + +#define FOREACH_TARGET_FN(macro) + +#define fnptr_to_ptr(x) ((void *)(uint64_t)x) + +#define ompt_ptr_unknown ((void *)0) + +//**************************************************************************** +// global data +//**************************************************************************** + +ompt_device_callbacks_t ompt_device_callbacks; + +//**************************************************************************** +// private data +//**************************************************************************** + +static bool ompt_enabled = false; + +static ompt_get_target_info_t LIBOMPTARGET_GET_TARGET_OPID; + +const char *ompt_device_callbacks_t::documentation = 0; + +static ompt_device *devices = 0; + +//**************************************************************************** +// private operations +//**************************************************************************** + +void ompt_device_callbacks_t::resize(int number_of_devices) { + devices = new ompt_device[number_of_devices]; +} + +ompt_device *ompt_device_callbacks_t::lookup_device(int device_num) { + return &devices[device_num]; +} + +ompt_interface_fn_t +ompt_device_callbacks_t::lookup(const char *interface_function_name) { +#define macro(fn) \ + if (strcmp(interface_function_name, #fn) == 0) \ + return (ompt_interface_fn_t)fn; + + FOREACH_TARGET_FN(macro); + +#undef macro + + return (ompt_interface_fn_t)0; +} + +static int ompt_device_init(ompt_function_lookup_t lookup, + int initial_device_num, ompt_data_t *tool_data) { + DP("OMPT: Enter ompt_device_init\n"); + + ompt_enabled = true; + + LIBOMPTARGET_GET_TARGET_OPID = + (ompt_get_target_info_t)lookup(stringify(LIBOMPTARGET_GET_TARGET_OPID)); + + DP("OMPT: libomptarget_get_target_info = %p\n", + fnptr_to_ptr(LIBOMPTARGET_GET_TARGET_OPID)); + + ompt_device_callbacks.register_callbacks(lookup); + + DP("OMPT: Exit ompt_device_init\n"); + + return 0; +} + +static void ompt_device_fini(ompt_data_t *tool_data) { + DP("OMPT: executing amdgpu_ompt_device_fini\n"); +} + +//**************************************************************************** +// constructor +//**************************************************************************** + +__attribute__((constructor)) static void ompt_init(void) { + DP("OMPT: Entering ompt_init\n"); + static library_ompt_connector_t libomptarget_connector("libomptarget"); + static ompt_start_tool_result_t ompt_result; + + ompt_result.initialize = ompt_device_init; + ompt_result.finalize = ompt_device_fini; + ompt_result.tool_data.value = 0; + ; + + ompt_device_callbacks.init(); + + libomptarget_connector.connect(&ompt_result); + DP("OMPT: Exiting ompt_init\n"); +} diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -38,6 +38,18 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" +#ifdef OMPT_SUPPORT +#include +#define OMPT_IF_ENABLED(stmts) \ + do { \ + if (ompt_device_callbacks.is_enabled()) { \ + stmts \ + } \ + } while (0) +#else +#define OMPT_IF_ENABLED(stmts) +#endif + // hostrpc interface, FIXME: consider moving to its own include these are // statically linked into amdgpu/plugin if present from hostrpc_services.a, // linked as --whole-archive to override the weak symbols that are used to @@ -777,6 +789,13 @@ return; } +#ifdef OMPT_SUPPORT + // TODO ompt_device_callbacks.enabled is not yet set since + // register_callbacks on the plugin instance is not yet + // called. Hence, unconditionally prepare devices. + ompt_device_callbacks.prepare_devices(NumberOfDevices); +#endif + for (int i = 0; i < NumberOfDevices; i++) { uint32_t queue_size = 0; { @@ -828,6 +847,11 @@ // Then none of these can have been set up and they can't be torn down return; } + + OMPT_IF_ENABLED(for (int i = 0; i < NumberOfDevices; i++) { + ompt_device_callbacks.ompt_callback_device_finalize(i); + }); + // Run destructors on types that use HSA before // impl_finalize removes access to it deviceStateStore.clear(); @@ -1807,6 +1831,11 @@ DeviceInfo.GroupsPerDevice[device_id] * DeviceInfo.ThreadsPerGroup[device_id]); + OMPT_IF_ENABLED( + std::string ompt_gpu_type("AMD "); ompt_gpu_type += GetInfoName; + const char *type = ompt_gpu_type.c_str(); + ompt_device_callbacks.ompt_callback_device_initialize(device_id, type);); + return OFFLOAD_SUCCESS; } @@ -1888,7 +1917,6 @@ return NULL; } - err = env.after_loading(); if (err != HSA_STATUS_SUCCESS) { return NULL; @@ -1897,6 +1925,14 @@ DP("AMDGPU module successfully loaded!\n"); + OMPT_IF_ENABLED(const char *filename = nullptr; int64_t offset_in_file = 0; + void *vma_in_file = 0; size_t bytes = img_size; + void *host_addr = image->ImageStart; void *device_addr = 0; + uint64_t module_id = 0; // FIXME + ompt_device_callbacks.ompt_callback_device_load( + device_id, filename, offset_in_file, vma_in_file, bytes, + host_addr, device_addr, module_id);); + { // the device_State array is either large value in bss or a void* that // needs to be assigned to a pointer to an array of size device_state_bytes diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -16,6 +16,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}/api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ompt_callback.cpp ${CMAKE_CURRENT_SOURCE_DIR}/rtl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/omptarget.cpp ) diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -19,6 +19,18 @@ #include #include +#ifdef OMPT_SUPPORT +#include "ompt_callback.h" +#define OMPT_IF_ENABLED(stmts) \ + do { \ + if (ompt_enabled) { \ + stmts \ + } \ + } while (0) +#else +#define OMPT_IF_ENABLED(stmts) +#endif + DeviceTy::DeviceTy(RTLInfoTy *RTL) : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), @@ -431,11 +443,36 @@ } void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { - return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_alloc_begin(RTLDeviceID, HstPtr, Size, + codeptr);); + + void *tgt_ptr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); + + OMPT_IF_ENABLED( + ompt_interface.target_data_alloc_end(RTLDeviceID, HstPtr, Size, codeptr); + ompt_interface.ompt_state_clear();); + return tgt_ptr; } int32_t DeviceTy::deleteData(void *TgtPtrBegin) { - return RTL->data_delete(RTLDeviceID, TgtPtrBegin); + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_delete_begin(RTLDeviceID, TgtPtrBegin, + codeptr);); + + int32_t status = RTL->data_delete(RTLDeviceID, TgtPtrBegin); + + OMPT_IF_ENABLED( + ompt_interface.target_data_delete_end(RTLDeviceID, TgtPtrBegin, codeptr); + ompt_interface.ompt_state_clear();); + + return status; } // Submit data to device @@ -453,11 +490,24 @@ : "unknown"); } + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_submit_begin(RTLDeviceID, TgtPtrBegin, + HstPtrBegin, Size, codeptr);); + + int32_t status; if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) - return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); + status = RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); else - return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, - AsyncInfo); + status = RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, + AsyncInfo); + + OMPT_IF_ENABLED(ompt_interface.target_data_submit_end( + RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, codeptr); + ompt_interface.ompt_state_clear();); + return status; } // Retrieve data from device @@ -474,11 +524,25 @@ : "unknown"); } + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_retrieve_begin(RTLDeviceID, HstPtrBegin, + TgtPtrBegin, Size, codeptr);); + + int32_t status; if (!RTL->data_retrieve_async || !RTL->synchronize) - return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); + status = RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); else - return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, - AsyncInfo); + status = RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, + Size, AsyncInfo); + + OMPT_IF_ENABLED(ompt_interface.target_data_retrieve_end( + RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, codeptr); + ompt_interface.ompt_state_clear();); + + return status; } // Copy data from current device to destination device directly diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -43,6 +43,7 @@ llvm_omp_get_dynamic_shared; __tgt_set_info_flag; __tgt_print_device_info; + libomptarget_ompt_connect; local: *; }; diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "device.h" #include "omptarget.h" #include "private.h" @@ -21,6 +23,18 @@ #include #include +#ifdef OMPT_SUPPORT +#include "ompt_callback.h" +#define OMPT_IF_ENABLED(stmts) \ + do { \ + if (ompt_enabled) { \ + stmts \ + } \ + } while (0) +#else +#define OMPT_IF_ENABLED(stmts) +#endif + //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t flags) { @@ -93,6 +107,13 @@ TIMESCOPE_WITH_IDENT(loc); DP("Entering data begin region for device %" PRId64 " with %d mappings\n", device_id, arg_num); + + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_enter_begin(device_id, codeptr);); + if (checkDeviceAndCtors(device_id, loc)) { DP("Not offloading to device %" PRId64 "\n", device_id); return; @@ -118,6 +139,9 @@ if (rc == OFFLOAD_SUCCESS) rc = AsyncInfo.synchronize(); handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + + OMPT_IF_ENABLED(ompt_interface.target_data_enter_end(device_id, codeptr); + ompt_interface.ompt_state_clear();); } EXTERN void __tgt_target_data_begin_nowait_mapper( @@ -182,11 +206,21 @@ #endif AsyncInfoTy AsyncInfo(Device); + + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_exit_begin(device_id, codeptr);); + int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, AsyncInfo); if (rc == OFFLOAD_SUCCESS) rc = AsyncInfo.synchronize(); handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + + OMPT_IF_ENABLED(ompt_interface.target_data_exit_end(device_id, codeptr); + ompt_interface.ompt_state_clear();); } EXTERN void __tgt_target_data_end_nowait_mapper( @@ -226,6 +260,13 @@ void **arg_mappers) { TIMESCOPE_WITH_IDENT(loc); DP("Entering data update with %d mappings\n", arg_num); + + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_update_begin(device_id, codeptr);); + if (checkDeviceAndCtors(device_id, loc)) { DP("Not offloading to device %" PRId64 "\n", device_id); return; @@ -242,6 +283,9 @@ if (rc == OFFLOAD_SUCCESS) rc = AsyncInfo.synchronize(); handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + + OMPT_IF_ENABLED(ompt_interface.target_update_end(device_id, codeptr); + ompt_interface.ompt_state_clear();); } EXTERN void __tgt_target_data_update_nowait_mapper( @@ -301,12 +345,23 @@ DeviceTy &Device = *PM->Devices[device_id]; AsyncInfoTy AsyncInfo(Device); + + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_begin(device_id, codeptr);); + int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, AsyncInfo); if (rc == OFFLOAD_SUCCESS) rc = AsyncInfo.synchronize(); handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + + OMPT_IF_ENABLED(ompt_interface.target_end(device_id, codeptr); + ompt_interface.ompt_state_clear();); + assert(rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!"); return OMP_TGT_SUCCESS; } @@ -375,12 +430,23 @@ DeviceTy &Device = *PM->Devices[device_id]; AsyncInfoTy AsyncInfo(Device); + + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_begin(device_id, codeptr);); + int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, team_num, thread_limit, true /*team*/, AsyncInfo); if (rc == OFFLOAD_SUCCESS) rc = AsyncInfo.synchronize(); handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + + OMPT_IF_ENABLED(ompt_interface.target_end(device_id, codeptr); + ompt_interface.ompt_state_clear();); + assert(rc == OFFLOAD_SUCCESS && "__tgt_target_teams_mapper unexpected failure!"); return OMP_TGT_SUCCESS; @@ -424,9 +490,9 @@ MapComponentInfoTy(base, begin, size, type, name)); } -EXTERN void __kmpc_push_target_tripcount(int64_t device_id, +EXTERN void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, uint64_t loop_tripcount) { - __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount); + __kmpc_push_target_tripcount_mapper(loc, device_id, loop_tripcount); } EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, diff --git a/openmp/libomptarget/src/ompt_callback.h b/openmp/libomptarget/src/ompt_callback.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/ompt_callback.h @@ -0,0 +1,112 @@ +//===----------- device.h - Target independent OpenMP target RTL ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declarations for OpenMP Tool callback dispatchers +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTARGET_CALLBACK_H +#define _OMPTARGET_CALLBACK_H + +#if (__PPC64__ | __arm__) +#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level) +#define OMPT_FRAME_POSITION_DEFAULT ompt_frame_cfa +#else +#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level) +#define OMPT_FRAME_POSITION_DEFAULT ompt_frame_framepointer +#endif + +#define OMPT_FRAME_FLAGS (ompt_frame_runtime | OMPT_FRAME_POSITION_DEFAULT) + +#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) + +#include + +class OmptInterface { +public: + OmptInterface() + : _enter_frame(NULL), _codeptr_ra(NULL), _state(ompt_state_idle) {} + + void ompt_state_set(void *enter_frame, void *codeptr_ra); + + void ompt_state_clear(); + + // target op callbacks + void target_data_alloc_begin(int64_t device_id, void *TgtPtrBegin, + size_t Size, void *codeptr); + + void target_data_alloc_end(int64_t device_id, void *TgtPtrBegin, size_t Size, + void *codeptr); + + void target_data_submit_begin(int64_t device_id, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *codeptr); + + void target_data_submit_end(int64_t device_id, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *codeptr); + + void target_data_delete_begin(int64_t device_id, void *TgtPtrBegin, + void *codeptr); + + void target_data_delete_end(int64_t device_id, void *TgtPtrBegin, + void *codeptr); + + void target_data_retrieve_begin(int64_t device_id, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, + void *codeptr); + + void target_data_retrieve_end(int64_t device_id, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *codeptr); + + void target_submit_begin(unsigned int num_teams = 1); + + void target_submit_end(unsigned int num_teams = 1); + + // target region callbacks + void target_data_enter_begin(int64_t device_id, void *codeptr); + + void target_data_enter_end(int64_t device_id, void *codeptr); + + void target_data_exit_begin(int64_t device_id, void *codeptr); + + void target_data_exit_end(int64_t device_id, void *codeptr); + + void target_update_begin(int64_t device_id, void *codeptr); + + void target_update_end(int64_t device_id, void *codeptr); + + void target_begin(int64_t device_id, void *codeptr); + + void target_end(int64_t device_id, void *codeptr); + +private: + void ompt_state_set_helper(void *enter_frame, void *codeptr_ra, int flags, + int state); + + // begin/end target op marks + void target_operation_begin(); + + void target_operation_end(); + + // begin/end target region marks + void target_region_begin(); + + void target_region_end(); + + void target_region_announce(const char *name); + +private: + void *_enter_frame; + void *_codeptr_ra; + int _state; +}; + +extern thread_local OmptInterface ompt_interface; + +extern bool ompt_enabled; + +#endif diff --git a/openmp/libomptarget/src/ompt_callback.cpp b/openmp/libomptarget/src/ompt_callback.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/ompt_callback.cpp @@ -0,0 +1,421 @@ +//===-- ompt_callback.cpp - Target independent OpenMP target RTL -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of OMPT callback interfaces for target independent layer +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +//**************************************************************************** +// local include files +//**************************************************************************** + +#include + +#include "ompt_callback.h" +#include "private.h" + +#include +#include + +/******************************************************************************* + * macros + *******************************************************************************/ + +#define OMPT_CALLBACK_AVAILABLE(fn) (ompt_enabled && fn) +#define OMPT_CALLBACK(fn, args) ompt_device_callbacks.fn args +#define fnptr_to_ptr(x) ((void *)(uint64_t)x) + +/******************************************************************************* + * type declarations + *******************************************************************************/ + +class libomptarget_rtl_finalizer_t { +public: + libomptarget_rtl_finalizer_t() : fn(0){}; + + void register_rtl(ompt_finalize_t _fn) { + assert(fn == 0); + fn = _fn; + }; + + void finalize() { + if (fn) + fn(NULL); + fn = 0; + }; + + ompt_finalize_t fn; +}; + +typedef int (*ompt_set_frame_enter_t)(void *addr, int flags, int state); + +typedef ompt_data_t *(*ompt_get_task_data_t)(); +typedef ompt_data_t *(*ompt_get_target_task_data_t)(); + +/***************************************************************************** + * global data + *****************************************************************************/ + +bool ompt_enabled = false; + +ompt_device_callbacks_t ompt_device_callbacks; + +/***************************************************************************** + * private data + *****************************************************************************/ + +static ompt_set_frame_enter_t ompt_set_frame_enter_fn = 0; +static ompt_get_task_data_t ompt_get_task_data_fn = 0; +static ompt_get_target_task_data_t ompt_get_target_task_data_fn = 0; + +static libomptarget_rtl_finalizer_t libomptarget_rtl_finalizer; + +const char *ompt_device_callbacks_t::documentation = 0; + +/***************************************************************************** + * Thread local data + *****************************************************************************/ + +thread_local OmptInterface ompt_interface; + +static thread_local uint64_t ompt_target_region_opid = 1; +static thread_local ompt_data_t ompt_target_data = ompt_data_none; +static thread_local ompt_data_t *ompt_task_data = 0; +static thread_local ompt_data_t *ompt_target_task_data = 0; +static thread_local ompt_id_t host_op_id = 0; + +static std::atomic unique_id_ticket(1); + +/***************************************************************************** + * OMPT callbacks + *****************************************************************************/ + +void OmptInterface::ompt_state_set_helper(void *enter_frame, void *codeptr_ra, + int flags, int state) { + _enter_frame = enter_frame; + _codeptr_ra = codeptr_ra; + if (ompt_set_frame_enter_fn) { + _state = ompt_set_frame_enter_fn(_enter_frame, flags, state); + } + + return; +} + +void OmptInterface::ompt_state_set(void *enter_frame, void *codeptr_ra) { + ompt_state_set_helper(enter_frame, codeptr_ra, OMPT_FRAME_FLAGS, + ompt_state_work_parallel); +} + +void OmptInterface::ompt_state_clear(void) { + ompt_state_set_helper(0, 0, 0, _state); +} + +/***************************************************************************** + * OMPT private operations + *****************************************************************************/ + +static uint64_t id_create() { return unique_id_ticket.fetch_add(1); } + +static uint64_t opid_create() { + host_op_id = id_create(); + return host_op_id; +} + +static uint64_t opid_get() { return host_op_id; } + +static uint64_t regionid_create() { + ompt_target_data.value = id_create(); + return ompt_target_data.value; +} + +static uint64_t regionid_get() { return ompt_target_data.value; } + +void OmptInterface::target_region_begin() { + // set up task region state + ompt_task_data = ompt_get_task_data_fn(); + ompt_target_task_data = ompt_get_target_task_data_fn(); + + *ompt_task_data = ompt_data_none; + *ompt_target_task_data = ompt_data_none; + ompt_target_data = ompt_data_none; +} + +void OmptInterface::target_region_announce(const char *name) { + DP("in OmptInterface::target_region_%s target_id=%lu\n", name, + ompt_target_data.value); +} + +void OmptInterface::target_region_end() { + ompt_task_data = 0; + ompt_target_task_data = 0; + ompt_target_data = ompt_data_none; +} + +void OmptInterface::target_operation_begin() { + DP("in ompt_target_region_begin (ompt_target_region_opid = %lu)\n", + ompt_target_data.value); +} + +void OmptInterface::target_operation_end() { + DP("in ompt_target_region_end (ompt_target_region_opid = %lu)\n", + ompt_target_data.value); +} + +/***************************************************************************** + * OMPT public operations + *****************************************************************************/ + +// FIXME: optional implementation of target map? + +void OmptInterface::target_data_alloc_begin(int64_t device_id, + void *hst_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_alloc, hst_ptr_begin, device_id, NULL, 0, size, codeptr, + opid_create, &ompt_target_region_opid); + target_operation_begin(); +} + +void OmptInterface::target_data_alloc_end(int64_t device_id, + void *hst_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_alloc, hst_ptr_begin, device_id, NULL, 0, size, codeptr, + opid_get, &ompt_target_region_opid); + target_operation_end(); +} + +void OmptInterface::target_data_submit_begin(int64_t device_id, + void *tgt_ptr_begin, + void *hst_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_to_device, hst_ptr_begin, 0, tgt_ptr_begin, + device_id, size, codeptr, opid_create, &ompt_target_region_opid); + target_operation_begin(); +} + +void OmptInterface::target_data_submit_end(int64_t device_id, + void *tgt_ptr_begin, + void *hst_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_to_device, hst_ptr_begin, 0, tgt_ptr_begin, + device_id, size, codeptr, opid_get, &ompt_target_region_opid); + target_operation_end(); +} + +void OmptInterface::target_data_delete_begin(int64_t device_id, + void *tgt_ptr_begin, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_delete, tgt_ptr_begin, device_id, NULL, 0, 0, codeptr, + opid_create, &ompt_target_region_opid); + target_operation_begin(); +} + +void OmptInterface::target_data_delete_end(int64_t device_id, + void *tgt_ptr_begin, void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_delete, tgt_ptr_begin, device_id, NULL, 0, 0, codeptr, + opid_get, &ompt_target_region_opid); + target_operation_end(); +} + +void OmptInterface::target_data_retrieve_begin(int64_t device_id, + void *hst_ptr_begin, + void *tgt_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_from_device, tgt_ptr_begin, device_id, + hst_ptr_begin, 0, size, codeptr, opid_create, &ompt_target_region_opid); + target_operation_begin(); +} + +void OmptInterface::target_data_retrieve_end(int64_t device_id, + void *hst_ptr_begin, + void *tgt_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_from_device, tgt_ptr_begin, device_id, + hst_ptr_begin, 0, size, codeptr, opid_get, &ompt_target_region_opid); + target_operation_end(); +} + +void OmptInterface::target_submit_begin(unsigned int num_teams) { + ompt_device_callbacks.ompt_callback_target_submit_emi( + ompt_scope_begin, &ompt_target_data, num_teams, opid_create, + &ompt_target_region_opid); +} + +void OmptInterface::target_submit_end(unsigned int num_teams) { + ompt_device_callbacks.ompt_callback_target_submit_emi( + ompt_scope_end, &ompt_target_data, num_teams, opid_get, + &ompt_target_region_opid); +} + +void OmptInterface::target_data_enter_begin(int64_t device_id, void *codeptr) { + target_region_begin(); + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_enter_data, ompt_scope_begin, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_create); +} + +void OmptInterface::target_data_enter_end(int64_t device_id, void *codeptr) { + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_enter_data, ompt_scope_end, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_get); + target_region_end(); +} + +void OmptInterface::target_data_exit_begin(int64_t device_id, void *codeptr) { + target_region_begin(); + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_exit_data, ompt_scope_begin, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_create); + target_region_announce("begin"); +} + +void OmptInterface::target_data_exit_end(int64_t device_id, void *codeptr) { + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_exit_data, ompt_scope_end, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_get); + target_region_end(); +} + +void OmptInterface::target_update_begin(int64_t device_id, void *codeptr) { + target_region_begin(); + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_update, ompt_scope_begin, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_create); + target_region_announce("begin"); +} + +void OmptInterface::target_update_end(int64_t device_id, void *codeptr) { + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_update, ompt_scope_end, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_get); + target_region_end(); +} + +void OmptInterface::target_begin(int64_t device_id, void *codeptr) { + target_region_begin(); + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target, ompt_scope_begin, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_create); + target_region_announce("begin"); +} + +void OmptInterface::target_end(int64_t device_id, void *codeptr) { + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target, ompt_scope_end, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_get); + target_region_end(); +} + +/***************************************************************************** + * OMPT interface operations + *****************************************************************************/ + +static void LIBOMPTARGET_GET_TARGET_OPID(uint64_t *device_num, + ompt_id_t *target_id, + ompt_id_t *host_op_id) { + *host_op_id = ompt_target_region_opid; +} + +static int libomptarget_ompt_initialize(ompt_function_lookup_t lookup, + int initial_device_num, + ompt_data_t *tool_data) { + DP("enter libomptarget_ompt_initialize!\n"); + + ompt_enabled = true; + +#define ompt_bind_name(fn) \ + fn##_fn = (fn##_t)lookup(#fn); \ + DP("%s=%p\n", #fn, fnptr_to_ptr(fn##_fn)); + + ompt_bind_name(ompt_set_frame_enter); + ompt_bind_name(ompt_get_task_data); + ompt_bind_name(ompt_get_target_task_data); + +#undef ompt_bind_name + + ompt_device_callbacks.register_callbacks(lookup); + + DP("exit libomptarget_ompt_initialize!\n"); + + return 0; +} + +static void libomptarget_ompt_finalize(ompt_data_t *data) { + DP("enter libomptarget_ompt_finalize!\n"); + + libomptarget_rtl_finalizer.finalize(); + + ompt_enabled = false; + + DP("exit libomptarget_ompt_finalize!\n"); +} + +// Today, this is not called from libomptarget +ompt_device *ompt_device_callbacks_t::lookup_device(int device_num) { + assert(0 && "Lookup device should be invoked in the plugin"); + return nullptr; +} + +ompt_interface_fn_t +ompt_device_callbacks_t::lookup(const char *interface_function_name) { + if (strcmp(interface_function_name, + stringify(LIBOMPTARGET_GET_TARGET_OPID)) == 0) + return (ompt_interface_fn_t)LIBOMPTARGET_GET_TARGET_OPID; + + return ompt_device_callbacks.lookup_callback(interface_function_name); +} + +/***************************************************************************** + * constructor + *****************************************************************************/ + +__attribute__((constructor(102))) static void ompt_init(void) { + static library_ompt_connector_t libomp_connector("libomp"); + static ompt_start_tool_result_t ompt_result; + + ompt_result.initialize = libomptarget_ompt_initialize; + ompt_result.finalize = libomptarget_ompt_finalize; + ompt_result.tool_data.value = 0; + + ompt_device_callbacks.init(); + + libomp_connector.connect(&ompt_result); + DP("OMPT: Exit ompt_init\n"); +} + +extern "C" { + +void libomptarget_ompt_connect(ompt_start_tool_result_t *result) { + DP("OMPT: Enter libomptarget_ompt_connect\n"); + if (ompt_enabled && result) { + libomptarget_rtl_finalizer.register_rtl(result->finalize); + result->initialize(ompt_device_callbacks_t::lookup, 0, NULL); + } + DP("OMPT: Leave libomptarget_ompt_connect\n"); +} +} diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -19,6 +19,18 @@ #include #include +#ifdef OMPT_SUPPORT +#include "ompt_callback.h" +#define OMPT_IF_ENABLED(stmts) \ + do { \ + if (ompt_enabled) { \ + stmts \ + } \ + } while (0) +#else +#define OMPT_IF_ENABLED(stmts) +#endif + int AsyncInfoTy::synchronize() { int Result = OFFLOAD_SUCCESS; if (AsyncInfo.Queue) { @@ -1455,6 +1467,10 @@ DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index); + OMPT_IF_ENABLED(ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), + OMPT_GET_RETURN_ADDRESS(0)); + ompt_interface.target_submit_begin(TeamNum);); + { TIMESCOPE_WITH_NAME_AND_IDENT( IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", loc); @@ -1472,6 +1488,9 @@ return OFFLOAD_FAIL; } + OMPT_IF_ENABLED(ompt_interface.target_submit_end(TeamNum); + ompt_interface.ompt_state_clear();); + if (ArgNum) { // Transfer data back and deallocate target memory for (first-)private // variables diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg --- a/openmp/libomptarget/test/lit.cfg +++ b/openmp/libomptarget/test/lit.cfg @@ -64,6 +64,9 @@ if config.libomptarget_debug: config.available_features.add('libomptarget-debug') +if config.has_libomptarget_ompt: + config.available_features.add('ompt') + config.available_features.add(config.libomptarget_current_target) # Determine whether the test system supports unified memory. diff --git a/openmp/libomptarget/test/lit.site.cfg.in b/openmp/libomptarget/test/lit.site.cfg.in --- a/openmp/libomptarget/test/lit.site.cfg.in +++ b/openmp/libomptarget/test/lit.site.cfg.in @@ -18,6 +18,7 @@ config.libomptarget_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@" config.libomptarget_not = "@OPENMP_NOT_EXECUTABLE@" config.libomptarget_debug = @LIBOMPTARGET_DEBUG@ +config.has_libomptarget_ompt = @LIBOMPTARGET_OMPT_SUPPORT@ # Let the main config do the real work. lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/openmp/libomptarget/test/ompt/callbacks.h b/openmp/libomptarget/test/ompt/callbacks.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/callbacks.h @@ -0,0 +1,126 @@ +#include + +// Tool related code below +#include + +// For EMI callbacks +ompt_id_t next_op_id = 0x8000000000000001; + +// OMPT callbacks + +// Synchronous callbacks +static void on_ompt_callback_device_initialize(int device_num, const char *type, + ompt_device_t *device, + ompt_function_lookup_t lookup, + const char *documentation) { + printf("Callback Init: device_num=%d type=%s device=%p lookup=%p doc=%p\n", + device_num, type, device, lookup, documentation); +} + +static void on_ompt_callback_device_finalize(int device_num) { + printf("Callback Fini: device_num=%d\n", device_num); +} + +static void on_ompt_callback_device_load(int device_num, const char *filename, + int64_t offset_in_file, + void *vma_in_file, size_t bytes, + void *host_addr, void *device_addr, + uint64_t module_id) { + printf("Callback Load: device_num:%d module_id:%lu filename:%s host_adddr:%p " + "device_addr:%p bytes:%lu\n", + device_num, module_id, filename, host_addr, device_addr, bytes); +} + +static void on_ompt_callback_target_data_op( + ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype, + void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, + size_t bytes, const void *codeptr_ra) { + assert(codeptr_ra != 0); + // Both src and dest must not be null + assert(src_addr != 0 || dest_addr != 0); + printf(" Callback DataOp: target_id=%lu host_op_id=%lu optype=%d src=%p " + "src_device_num=%d " + "dest=%p dest_device_num=%d bytes=%lu code=%p\n", + target_id, host_op_id, optype, src_addr, src_device_num, dest_addr, + dest_device_num, bytes, codeptr_ra); +} + +static void on_ompt_callback_target(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_id_t target_id, + const void *codeptr_ra) { + assert(codeptr_ra != 0); + printf("Callback Target: target_id=%lu kind=%d endpoint=%d device_num=%d " + "code=%p\n", + target_id, kind, endpoint, device_num, codeptr_ra); +} + +static void on_ompt_callback_target_submit(ompt_id_t target_id, + ompt_id_t host_op_id, + unsigned int requested_num_teams) { + printf(" Callback Submit: target_id=%lu host_op_id=%lu req_num_teams=%d\n", + target_id, host_op_id, requested_num_teams); +} + +static void on_ompt_callback_target_map(ompt_id_t target_id, + unsigned int nitems, void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + assert(0 && "Target map callback is unimplemented"); +} + +static void on_ompt_callback_target_data_op_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_task_data, + ompt_data_t *target_data, ompt_id_t *host_op_id, + ompt_target_data_op_t optype, void *src_addr, int src_device_num, + void *dest_addr, int dest_device_num, size_t bytes, + const void *codeptr_ra) { + assert(codeptr_ra != 0); + // Both src and dest must not be null + assert(src_addr != 0 || dest_addr != 0); + if (endpoint == ompt_scope_begin) + *host_op_id = next_op_id++; + printf(" Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p " + "(0x%lx) target_data=%p (0x%lx) host_op_id=%p (0x%lx) src=%p " + "src_device_num=%d " + "dest=%p dest_device_num=%d bytes=%lu code=%p\n", + endpoint, optype, target_task_data, target_task_data->value, + target_data, target_data->value, host_op_id, *host_op_id, src_addr, + src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra); +} + +static void on_ompt_callback_target_emi(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_data_t *target_task_data, + ompt_data_t *target_data, + const void *codeptr_ra) { + assert(codeptr_ra != 0); + if (endpoint == ompt_scope_begin) + target_data->value = next_op_id++; + printf("Callback Target EMI: kind=%d endpoint=%d device_num=%d task_data=%p " + "(0x%lx) target_task_data=%p (0x%lx) target_data=%p (0x%lx) code=%p\n", + kind, endpoint, device_num, task_data, task_data->value, + target_task_data, target_task_data->value, target_data, + target_data->value, codeptr_ra); +} + +static void on_ompt_callback_target_submit_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_data, + ompt_id_t *host_op_id, unsigned int requested_num_teams) { + printf(" Callback Submit EMI: endpoint=%d req_num_teams=%d target_data=%p " + "(0x%lx) host_op_id=%p (0x%lx)\n", + endpoint, requested_num_teams, target_data, target_data->value, + host_op_id, *host_op_id); +} + +static void on_ompt_callback_target_map_emi(ompt_data_t *target_data, + unsigned int nitems, + void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + assert(0 && "Target map emi callback is unimplemented"); +} diff --git a/openmp/libomptarget/test/ompt/register_both.h b/openmp/libomptarget/test/ompt/register_both.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_both.h @@ -0,0 +1,49 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op_emi); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_emi); + register_ompt_callback(ompt_callback_target_submit); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_emi.h b/openmp/libomptarget/test/ompt/register_emi.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_emi.h @@ -0,0 +1,47 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op_emi); + register_ompt_callback(ompt_callback_target_emi); + register_ompt_callback(ompt_callback_target_submit_emi); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_emi_map.h b/openmp/libomptarget/test/ompt/register_emi_map.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_emi_map.h @@ -0,0 +1,48 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op_emi); + register_ompt_callback(ompt_callback_target_emi); + register_ompt_callback(ompt_callback_target_submit_emi); + register_ompt_callback(ompt_callback_target_map_emi); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_no_device_init.h b/openmp/libomptarget/test/ompt/register_no_device_init.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_no_device_init.h @@ -0,0 +1,47 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + // If no device init callback is registered, the other callbacks won't be + // activated. + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_non_emi.h b/openmp/libomptarget/test/ompt/register_non_emi.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_non_emi.h @@ -0,0 +1,47 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_non_emi_map.h b/openmp/libomptarget/test/ompt/register_non_emi_map.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_non_emi_map.h @@ -0,0 +1,48 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + register_ompt_callback(ompt_callback_target_map); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_wrong_return.h b/openmp/libomptarget/test/ompt/register_wrong_return.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_wrong_return.h @@ -0,0 +1,47 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 1; // failed but wrongly returning 1 + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + + return 0; // success but should return 1 according to the spec +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/veccopy.c b/openmp/libomptarget/test/ompt/veccopy.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/veccopy.c @@ -0,0 +1,78 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-newRTL +// UNSUPPORTED: x86_64-pc-linux-gnu + +#include +#include + +#include "callbacks.h" +#include "register_non_emi.h" + +int main() +{ + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i=0; i +#include + +#include "callbacks.h" +#include "register_both.h" + +int main() +{ + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i=0; i +#include +#include + +#include "callbacks.h" +#include "register_emi.h" + +int main() +{ + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i=0; i +#include +#include + +#include "callbacks.h" +#include "register_emi_map.h" + +int main() +{ + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i=0; i +#include + +#include "callbacks.h" +#include "register_non_emi_map.h" + +int main() +{ + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i=0; i +#include + +#include "callbacks.h" +#include "register_no_device_init.h" + +int main() +{ + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i=0; i +#include + +#include "callbacks.h" +#include "register_wrong_return.h" + +int main() +{ + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i=0; ifinalize -#endif - ) { - ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data)); + if (ompt_enabled.enabled) { + if (ompt_start_tool_result && ompt_start_tool_result->finalize) { + ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data)); + } + if (libomptarget_ompt_result && libomptarget_ompt_result->finalize) { + libomptarget_ompt_result->finalize(NULL); + } } if (ompt_tool_module) @@ -868,5 +870,56 @@ FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) - return NULL; +#undef ompt_interface_fn + + return (ompt_interface_fn_t)0; +} + +static int ompt_set_frame_enter(void *addr, int flags, int state) { + return __ompt_set_frame_enter_internal(addr, flags, state); +} + +static ompt_data_t *ompt_get_task_data() { return __ompt_get_task_data(); } + +static ompt_data_t *ompt_get_target_task_data() { + return __ompt_get_target_task_data(); +} + +static ompt_interface_fn_t libomp_target_fn_lookup(const char *s) { +#define provide_fn(fn) \ + if (strcmp(s, #fn) == 0) \ + return (ompt_interface_fn_t)fn; + + provide_fn(ompt_set_frame_enter); + provide_fn(ompt_get_task_data); + provide_fn(ompt_get_target_task_data); + +#define ompt_interface_fn(fn, type, code) \ + if (strcmp(s, #fn) == 0) \ + return (ompt_interface_fn_t)ompt_callbacks.ompt_callback(fn); + + FOREACH_OMPT_DEVICE_EVENT(ompt_interface_fn) + FOREACH_OMPT_EMI_EVENT(ompt_interface_fn) + FOREACH_OMPT_NOEMI_EVENT(ompt_interface_fn) + +#undef ompt_interface_fn + + return (ompt_interface_fn_t)0; +} + +_OMP_EXTERN void libomp_ompt_connect(ompt_start_tool_result_t *result) { + OMPT_VERBOSE_INIT_PRINT("libomp --> OMPT: Enter libomp_ompt_connect\n"); + + __ompt_force_initialization(); + + if (ompt_enabled.enabled && + ompt_callbacks.ompt_callback(ompt_callback_device_initialize)) { + if (result) { + OMPT_VERBOSE_INIT_PRINT( + "libomp --> OMPT: Connecting with libomptarget\n"); + result->initialize(libomp_target_fn_lookup, 0, NULL); + libomptarget_ompt_result = result; + } + } + OMPT_VERBOSE_INIT_PRINT("libomp --> OMPT: Exit libomp_ompt_connect\n"); } diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -13,6 +13,8 @@ #ifndef __OMPT_INTERNAL_H__ #define __OMPT_INTERNAL_H__ +#include "kmp_platform.h" + #include "ompt-event-specific.h" #include "omp-tools.h" @@ -24,6 +26,16 @@ ((x == fork_context_gnu) ? ompt_parallel_invoker_program \ : ompt_parallel_invoker_runtime) +#define OMPT_FRAME_SET(frame, which, ptr_value, flags) \ + { \ + frame->which##_frame.ptr = ptr_value; \ + frame->which##_frame_flags = flags; \ + } + +#define OMPT_FRAME_CLEAR(frame, which) OMPT_FRAME_SET(frame, which, 0, 0) + +#define OMPT_FRAME_SET_P(frame, which) (frame->which##_frame.ptr != NULL) + #define ompt_callback(e) e##_callback typedef struct ompt_callbacks_internal_s { @@ -75,6 +87,7 @@ ompt_data_t thread_data; ompt_data_t task_data; /* stored here from implicit barrier-begin until implicit-task-end */ + ompt_data_t target_task_data; void *return_address; /* stored here on entry of runtime */ ompt_state_t state; ompt_wait_id_t wait_id; diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h --- a/openmp/runtime/src/ompt-specific.h +++ b/openmp/runtime/src/ompt-specific.h @@ -20,7 +20,12 @@ * forward declarations ****************************************************************************/ +void __ompt_force_initialization(); + +int __ompt_set_frame_enter_internal(void *addr, int flags, int state); + void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid); + void __ompt_thread_assign_wait_id(void *variable); void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, @@ -33,6 +38,10 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); +ompt_data_t *__ompt_get_task_data(); + +ompt_data_t *__ompt_get_target_task_data(); + ompt_task_info_t *__ompt_get_task_info_object(int depth); int __ompt_get_parallel_info_internal(int ancestor_level, @@ -57,12 +66,12 @@ * macros ****************************************************************************/ -#define OMPT_CUR_TASK_INFO(thr) (&(thr->th.th_current_task->ompt_task_info)) +#define OMPT_CUR_TASK_INFO(thr) (&((thr)->th.th_current_task->ompt_task_info)) #define OMPT_CUR_TASK_DATA(thr) \ - (&(thr->th.th_current_task->ompt_task_info.task_data)) -#define OMPT_CUR_TEAM_INFO(thr) (&(thr->th.th_team->t.ompt_team_info)) + (&((thr)->th.th_current_task->ompt_task_info.task_data)) +#define OMPT_CUR_TEAM_INFO(thr) (&((thr)->th.th_team->t.ompt_team_info)) #define OMPT_CUR_TEAM_DATA(thr) \ - (&(thr->th.th_team->t.ompt_team_info.parallel_data)) + (&((thr)->th.th_team->t.ompt_team_info.parallel_data)) #define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI @@ -79,7 +88,8 @@ if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ !__kmp_threads[gtid]->th.ompt_thread_info.return_address) \ __kmp_threads[gtid]->th.ompt_thread_info.return_address = \ - __builtin_return_address(0)*/ + __builtin_return_address(0)*/ + #define OMPT_STORE_RETURN_ADDRESS(gtid) \ OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)}; #define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp --- a/openmp/runtime/src/ompt-specific.cpp +++ b/openmp/runtime/src/ompt-specific.cpp @@ -188,6 +188,11 @@ //****************************************************************************** // interface operations //****************************************************************************** +//---------------------------------------------------------- +// initialization support +//---------------------------------------------------------- + +void __ompt_force_initialization() { __kmp_serial_initialize(); } //---------------------------------------------------------- // thread support @@ -260,7 +265,11 @@ lwt->ompt_team_info.master_return_address = codeptr; lwt->ompt_task_info.task_data.value = 0; lwt->ompt_task_info.frame.enter_frame = ompt_data_none; + lwt->ompt_task_info.frame.enter_frame_flags = 0; + ; lwt->ompt_task_info.frame.exit_frame = ompt_data_none; + lwt->ompt_task_info.frame.exit_frame_flags = 0; + ; lwt->ompt_task_info.scheduling_parent = NULL; lwt->heap = 0; lwt->parent = 0; @@ -339,6 +348,16 @@ // task support //---------------------------------------------------------- +ompt_data_t *__ompt_get_task_data() { + kmp_info_t *thr = ompt_get_thread(); + ompt_data_t *task_data = thr ? OMPT_CUR_TASK_DATA(thr) : NULL; + return task_data; +} + +ompt_data_t *__ompt_get_target_task_data() { + return &__kmp_threads[__kmp_get_gtid()]->th.ompt_thread_info.target_task_data; +} + int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, ompt_frame_t **task_frame, @@ -479,6 +498,21 @@ return 1; } +//---------------------------------------------------------- +// target region support +//---------------------------------------------------------- + +int __ompt_set_frame_enter_internal(void *addr, int flags, int state) { + int gtid = __kmp_entry_gtid(); + kmp_info_t *thr = __kmp_threads[gtid]; + + ompt_frame_t *ompt_frame = &OMPT_CUR_TASK_INFO(thr)->frame; + OMPT_FRAME_SET(ompt_frame, enter, addr, flags); + int old_state = thr->th.ompt_thread_info.state; + thr->th.ompt_thread_info.state = ompt_state_work_parallel; + return old_state; +} + //---------------------------------------------------------- // team support //----------------------------------------------------------