diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -63,8 +63,47 @@ add_definitions(-DOMPTARGET_DEBUG) endif() +# Let libomptarget OMPT support enabling follow host OMPT support +# But LIBOMP_ARCH is not set +libomp_get_architecture(LIBOMP_ARCH) + +# Duplicated from openmp/runtime/cmake/config-ix.cmake +if(NOT LIBOMP_HAVE___BUILTIN_FRAME_ADDRESS) + set(LIBOMP_HAVE_OMPT_SUPPORT FALSE) +else() + if( # hardware architecture supported? + ((LIBOMP_ARCH STREQUAL x86_64) OR + (LIBOMP_ARCH STREQUAL i386) OR +# (LIBOMP_ARCH STREQUAL arm) OR + (LIBOMP_ARCH STREQUAL aarch64) OR + (LIBOMP_ARCH STREQUAL aarch64_a64fx) OR + (LIBOMP_ARCH STREQUAL ppc64le) OR + (LIBOMP_ARCH STREQUAL ppc64) OR + (LIBOMP_ARCH STREQUAL riscv64)) + AND # OS supported? + ((WIN32 AND LIBOMP_HAVE_PSAPI) OR APPLE OR (NOT WIN32 AND LIBOMP_HAVE_WEAK_ATTRIBUTE))) + set(LIBOMP_HAVE_OMPT_SUPPORT TRUE) + else() + set(LIBOMP_HAVE_OMPT_SUPPORT FALSE) + endif() +endif() + +# OMPT support for libomptarget +set(OMPT_TARGET_DEFAULT FALSE) +if ((LIBOMP_HAVE_OMPT_SUPPORT) AND (NOT WIN32)) + set (OMPT_TARGET_DEFAULT TRUE) +endif() +set(LIBOMPTARGET_OMPT_SUPPORT ${OMPT_TARGET_DEFAULT} CACHE BOOL "OMPT-target-support?") +if (LIBOMPTARGET_OMPT_SUPPORT) + add_definitions(-DOMPT_SUPPORT=1) + message(STATUS "OMPT target enabled") +else() + message(STATUS "OMPT target disabled") +endif() + set(LIBOMPTARGET_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) -include_directories(${LIBOMPTARGET_INCLUDE_DIR}) +set(LIBOMP_RUNTIME_SRC_BINARY_DIR ${CMAKE_BINARY_DIR}/openmp/runtime/src) +include_directories(${LIBOMPTARGET_INCLUDE_DIR} ${LIBOMP_RUNTIME_SRC_BINARY_DIR}) # Build target agnostic offloading library. set(LIBOMPTARGET_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) diff --git a/openmp/libomptarget/include/ompt-connector.h b/openmp/libomptarget/include/ompt-connector.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/ompt-connector.h @@ -0,0 +1,91 @@ +//=== ompt-connector.h - Target independent OpenMP target RTL -- C++ ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Support used by OMPT implementation to establish communication between +// various OpenMP runtime libraries: host openmp library, target-independent +// runtime library, and device-dependent runtime libraries. +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPT_CONNECTOR_H +#define _OMPT_CONNECTOR_H + +//**************************************************************************** +// global includes +//**************************************************************************** + +#include +#include + +//**************************************************************************** +// local includes +//**************************************************************************** + +#include +#include +#include + +//**************************************************************************** +// type declarations +//**************************************************************************** + +#define stringify(s) #s + +#define LIBOMPTARGET_GET_TARGET_OPID libomptarget_get_target_opid + +//**************************************************************************** +// type declarations +//**************************************************************************** + +typedef void (*library_ompt_connect_t)(ompt_start_tool_result_t *result); + +//---------------------------------------------------------------------------- +// class library_ompt_connector_t +// purpose: +// +// establish connection between openmp runtime libraries +// +// NOTE: This class is intended for use in attribute constructors. therefore, +// it should be declared within the constructor function to ensure that +// the class is initialized before it's methods are used +//---------------------------------------------------------------------------- + +class library_ompt_connector_t { +public: + void connect(ompt_start_tool_result_t *ompt_result) { + initialize(); + if (library_ompt_connect) { + library_ompt_connect(ompt_result); + } + }; + + library_ompt_connector_t(const char *library_name) { + library_connect_routine.append(library_name); + library_connect_routine.append("_ompt_connect"); + is_initialized = false; + }; + +private: + void initialize() { + if (is_initialized == false) { + DP("OMPT: library_ompt_connect = %s\n", library_connect_routine.c_str()); + void *vptr = dlsym(NULL, library_connect_routine.c_str()); + library_ompt_connect = reinterpret_cast( + reinterpret_cast(vptr)); + DP("OMPT: library_ompt_connect = %p\n", library_ompt_connect); + is_initialized = true; + } + }; + +private: + bool is_initialized; + library_ompt_connect_t library_ompt_connect; + std::string library_connect_routine; +}; + +#endif diff --git a/openmp/libomptarget/include/ompt_device_callbacks.h b/openmp/libomptarget/include/ompt_device_callbacks.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/ompt_device_callbacks.h @@ -0,0 +1,268 @@ +//=== ompt_device_callbacks.h - Target independent OpenMP target RTL -- C++ +//-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Interface used by both target-independent and device-dependent runtimes +// to coordinate registration and invocation of OMPT callbacks +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPT_DEVICE_CALLBACKS_H +#define _OMPT_DEVICE_CALLBACKS_H + +//**************************************************************************** +// local includes +//**************************************************************************** + +#include +#include + +#include + +//**************************************************************************** +// macros +//**************************************************************************** +#define FOREACH_OMPT_TARGET_CALLBACK(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +/***************************************************************************** + * implementation specific types + *****************************************************************************/ + +//**************************************************************************** +// types +//**************************************************************************** + +typedef uint64_t (*id_interface_t)(); + +class ompt_device { +public: + ompt_device() { atomic_store(&enabled, false); }; + bool do_initialize() { + bool old = false; + return atomic_compare_exchange_strong(&enabled, &old, true); + }; + bool do_finalize() { + bool old = true; + return atomic_compare_exchange_strong(&enabled, &old, false); + }; + +private: + std::atomic enabled; +}; + +class ompt_device_callbacks_t { +public: + virtual void ompt_callback_device_initialize(int device_num, + const char *type) { + if (ompt_callback_device_initialize_fn) { + ompt_device *device = lookup_device(device_num); + if (device->do_initialize()) { + ompt_callback_device_initialize_fn( + device_num, type, (ompt_device_t *)device, lookup, documentation); + } + } + }; + + virtual void ompt_callback_device_finalize(int device_num) { + if (ompt_callback_device_finalize_fn) { + ompt_device *device = lookup_device(device_num); + if (device->do_finalize()) { + ompt_callback_device_finalize_fn(device_num); + } + } + }; + + virtual void ompt_callback_device_load(int device_num, const char *filename, + int64_t offset_in_file, + void *vma_in_file, size_t bytes, + void *host_addr, void *device_addr, + uint64_t module_id) { + if (ompt_callback_device_load_fn) { + ompt_callback_device_load_fn(device_num, filename, offset_in_file, + vma_in_file, bytes, host_addr, device_addr, + module_id); + } + }; + + virtual void ompt_callback_device_unload(int device_num, uint64_t module_id) { + if (ompt_callback_device_unload_fn) { + ompt_callback_device_unload_fn(device_num, module_id); + } + }; + + virtual void ompt_callback_target_data_op_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_task_data, + ompt_data_t *target_data, ompt_target_data_op_t optype, void *src_addr, + int src_device_num, void *dest_addr, int dest_device_num, size_t bytes, + const void *codeptr_ra, id_interface_t id_interface, + ompt_id_t *host_op_id) { + if (ompt_callback_target_data_op_emi_fn) { + ompt_callback_target_data_op_emi_fn( + endpoint, target_task_data, target_data, host_op_id, optype, src_addr, + src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra); + } else if (endpoint == ompt_scope_begin) { + ompt_callback_target_data_op(target_data->value, optype, src_addr, + src_device_num, dest_addr, dest_device_num, + bytes, codeptr_ra, id_interface, host_op_id); + } + }; + + virtual void ompt_callback_target_data_op( + ompt_id_t target_id, ompt_target_data_op_t optype, void *src_addr, + int src_device_num, void *dest_addr, int dest_device_num, size_t bytes, + const void *codeptr_ra, id_interface_t id_interface, + ompt_id_t *host_op_id) { + if (ompt_callback_target_data_op_fn) { + *host_op_id = id_interface(); + ompt_callback_target_data_op_fn(target_id, *host_op_id, optype, src_addr, + src_device_num, dest_addr, + dest_device_num, bytes, codeptr_ra); + } + }; + + virtual void ompt_callback_target_emi(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_data_t *target_task_data, + ompt_data_t *target_data, + const void *codeptr_ra, + id_interface_t id_interface) { + if (ompt_callback_target_emi_fn) { + ompt_callback_target_emi_fn(kind, endpoint, device_num, task_data, + target_task_data, target_data, codeptr_ra); + } else { + ompt_callback_target(kind, endpoint, device_num, task_data, codeptr_ra, + target_data, id_interface); + } + }; + + virtual void ompt_callback_target(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + const void *codeptr_ra, + ompt_data_t *target_data, + id_interface_t id_interface) { + // if we reach this point, ompt_callback_target_emi was not + // invoked so a tool didn't provide a target id. thus, we must + // unconditionally get an id here. even if there is no + // ompt_callback_target, we need to have an id for use by other + // callbacks. + // note: + // on a scope_begin callback, id_interface will generate an id. + // on a scope_end callback, id_interface will return the existing + // id. it is safe to do the assignment again. + target_data->value = id_interface(); + if (ompt_callback_target_fn) { + ompt_callback_target_fn(kind, endpoint, device_num, task_data, + target_data->value, codeptr_ra); + } + }; + + virtual void ompt_callback_target_map_emi(ompt_data_t *target_data, + unsigned int nitems, + void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + if (ompt_callback_target_map_emi_fn) { + ompt_callback_target_map_emi_fn(target_data, nitems, host_addr, + device_addr, bytes, mapping_flags, + codeptr_ra); + } else { + ompt_callback_target_map(target_data->value, nitems, host_addr, + device_addr, bytes, mapping_flags, codeptr_ra); + } + }; + + virtual void ompt_callback_target_map(ompt_id_t target_id, + unsigned int nitems, void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + if (ompt_callback_target_map_fn) { + ompt_callback_target_map_fn(target_id, nitems, host_addr, device_addr, + bytes, mapping_flags, codeptr_ra); + } + }; + + virtual void ompt_callback_target_submit_emi(ompt_scope_endpoint_t endpoint, + ompt_data_t *target_data, + unsigned int requested_num_teams, + id_interface_t id_interface, + ompt_id_t *host_op_id) { + if (ompt_callback_target_submit_emi_fn) { + ompt_callback_target_submit_emi_fn(endpoint, target_data, host_op_id, + requested_num_teams); + } else if (endpoint == ompt_scope_begin) { + return ompt_callback_target_submit( + target_data->value, requested_num_teams, id_interface, host_op_id); + } + }; + + virtual void ompt_callback_target_submit(ompt_id_t target_id, + unsigned int requested_num_teams, + id_interface_t id_interface, + ompt_id_t *host_op_id) { + if (ompt_callback_target_submit_fn) { + *host_op_id = id_interface(); + ompt_callback_target_submit_fn(target_id, *host_op_id, + requested_num_teams); + } + }; + + void init() { + enabled = false; +#define init_name(name, type, code) name##_fn = 0; + FOREACH_OMPT_TARGET_CALLBACK(init_name) +#undef init_name + }; + + bool is_enabled() { return enabled; } + + void prepare_devices(int number_of_devices) { resize(number_of_devices); }; + + void register_callbacks(ompt_function_lookup_t lookup) { + enabled = true; +#define ompt_bind_callback(fn, type, code) \ + fn##_fn = (fn##_t)lookup(#fn); \ + DP("OMPT: class bound %s=%p\n", #fn, ((void *)(uint64_t)fn##_fn)); + FOREACH_OMPT_TARGET_CALLBACK(ompt_bind_callback); +#undef ompt_bind_callback + }; + + ompt_interface_fn_t lookup_callback(const char *interface_function_name) { +#define ompt_dolookup(fn, type, code) \ + if (strcmp(interface_function_name, #fn) == 0) \ + return (ompt_interface_fn_t)fn##_fn; + + FOREACH_OMPT_TARGET_CALLBACK(ompt_dolookup); +#undef ompt_dolookup + + return (ompt_interface_fn_t)0; + }; + + static ompt_interface_fn_t lookup(const char *interface_function_name); + +private: + bool enabled; + +#define declare_name(name, type, code) name##_t name##_fn; + FOREACH_OMPT_TARGET_CALLBACK(declare_name) +#undef declare_name + + static void resize(int number_of_devices); + static ompt_device *lookup_device(int device_num); + static const char *documentation; +}; + +extern ompt_device_callbacks_t ompt_device_callbacks; + +#endif diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -348,7 +348,8 @@ int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); -void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount); +void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, + uint64_t loop_tripcount); void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, uint64_t loop_tripcount); diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt --- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt +++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt @@ -69,6 +69,7 @@ impl/system.cpp impl/msgpack.cpp src/rtl.cpp + src/ompt_callback.cpp ${LIBOMPTARGET_EXTRA_SOURCE} ) diff --git a/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp b/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp @@ -0,0 +1,132 @@ +//===------ ompt_callback.cpp - Target RTLs Implementation -------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// OMPT support for AMDGPU +// +//===----------------------------------------------------------------------===// + +//**************************************************************************** +// global includes +//**************************************************************************** + +#include +#include + +#include + +//**************************************************************************** +// debug macro needed by include files +//**************************************************************************** + +#ifndef DEBUG_PREFIX +#define DEBUG_PREFIX "Target AMDGPU RTL" +#endif + +//**************************************************************************** +// local includes +//**************************************************************************** + +#include +#include +#include + +//**************************************************************************** +// macros +//**************************************************************************** + +#define FOREACH_TARGET_FN(macro) + +#define fnptr_to_ptr(x) ((void *)(uint64_t)x) + +#define ompt_ptr_unknown ((void *)0) + +//**************************************************************************** +// global data +//**************************************************************************** + +ompt_device_callbacks_t ompt_device_callbacks; + +//**************************************************************************** +// private data +//**************************************************************************** + +static bool ompt_enabled = false; + +static ompt_get_target_info_t LIBOMPTARGET_GET_TARGET_OPID; + +const char *ompt_device_callbacks_t::documentation = 0; + +static ompt_device *devices = 0; + +//**************************************************************************** +// private operations +//**************************************************************************** + +void ompt_device_callbacks_t::resize(int number_of_devices) { + devices = new ompt_device[number_of_devices]; +} + +ompt_device *ompt_device_callbacks_t::lookup_device(int device_num) { + return &devices[device_num]; +} + +ompt_interface_fn_t +ompt_device_callbacks_t::lookup(const char *interface_function_name) { +#define macro(fn) \ + if (strcmp(interface_function_name, #fn) == 0) \ + return (ompt_interface_fn_t)fn; + + FOREACH_TARGET_FN(macro); + +#undef macro + + return (ompt_interface_fn_t)0; +} + +static int ompt_device_init(ompt_function_lookup_t lookup, + int initial_device_num, ompt_data_t *tool_data) { + DP("OMPT: Enter ompt_device_init\n"); + + ompt_enabled = true; + + LIBOMPTARGET_GET_TARGET_OPID = + (ompt_get_target_info_t)lookup(stringify(LIBOMPTARGET_GET_TARGET_OPID)); + + DP("OMPT: libomptarget_get_target_info = %p\n", + fnptr_to_ptr(LIBOMPTARGET_GET_TARGET_OPID)); + + ompt_device_callbacks.register_callbacks(lookup); + + DP("OMPT: Exit ompt_device_init\n"); + + return 0; +} + +static void ompt_device_fini(ompt_data_t *tool_data) { + DP("OMPT: executing amdgpu_ompt_device_fini\n"); +} + +//**************************************************************************** +// constructor +//**************************************************************************** + +__attribute__((constructor)) static void ompt_init(void) { + DP("OMPT: Entering ompt_init\n"); + static library_ompt_connector_t libomptarget_connector("libomptarget"); + static ompt_start_tool_result_t ompt_result; + + ompt_result.initialize = ompt_device_init; + ompt_result.finalize = ompt_device_fini; + ompt_result.tool_data.value = 0; + ; + + ompt_device_callbacks.init(); + + libomptarget_connector.connect(&ompt_result); + DP("OMPT: Exiting ompt_init\n"); +} diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -38,6 +38,16 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" +#ifdef OMPT_SUPPORT +#include +#define OMPT_IF_ENABLED(stmts) \ + if (ompt_device_callbacks.is_enabled()) { \ + stmts \ + } +#else +#define OMPT_IF_ENABLED(stmts) +#endif + // hostrpc interface, FIXME: consider moving to its own include these are // statically linked into amdgpu/plugin if present from hostrpc_services.a, // linked as --whole-archive to override the weak symbols that are used to @@ -725,6 +735,13 @@ return; } +#ifdef OMPT_SUPPORT + // TODO ompt_device_callbacks.enabled is not yet set since + // register_callbacks on the plugin instance is not yet + // called. Hence, unconditionally prepare devices. + ompt_device_callbacks.prepare_devices(NumberOfDevices); +#endif + for (int i = 0; i < NumberOfDevices; i++) { uint32_t queue_size = 0; { @@ -777,10 +794,16 @@ ~RTLDeviceInfoTy() { DP("Finalizing the " GETNAME(TARGET_NAME) " DeviceInfo.\n"); + + OMPT_IF_ENABLED(for (int i = 0; i < NumberOfDevices; i++) { + ompt_device_callbacks.ompt_callback_device_finalize(i); + }) + if (!HSA.success()) { // Then none of these can have been set up and they can't be torn down return; } + // Run destructors on types that use HSA before // impl_finalize removes access to it deviceStateStore.clear(); @@ -1100,6 +1123,11 @@ DeviceInfo.GroupsPerDevice[device_id] * DeviceInfo.ThreadsPerGroup[device_id]); + OMPT_IF_ENABLED( + std::string ompt_gpu_type("AMD "); ompt_gpu_type += GetInfoName; + const char *type = ompt_gpu_type.c_str(); + ompt_device_callbacks.ompt_callback_device_initialize(device_id, type);) + return OFFLOAD_SUCCESS; } @@ -1479,7 +1507,6 @@ return NULL; } - err = env.after_loading(); if (err != HSA_STATUS_SUCCESS) { return NULL; @@ -1488,6 +1515,14 @@ DP("AMDGPU module successfully loaded!\n"); + OMPT_IF_ENABLED(const char *filename = nullptr; int64_t offset_in_file = 0; + void *vma_in_file = 0; size_t bytes = img_size; + void *host_addr = image->ImageStart; void *device_addr = 0; + uint64_t module_id = 0; // FIXME + ompt_device_callbacks.ompt_callback_device_load( + device_id, filename, offset_in_file, vma_in_file, bytes, + host_addr, device_addr, module_id);) + { // the device_State array is either large value in bss or a void* that // needs to be assigned to a pointer to an array of size device_state_bytes diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -16,6 +16,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}/api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ompt_callback.cpp ${CMAKE_CURRENT_SOURCE_DIR}/rtl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/omptarget.cpp ) diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -19,6 +19,16 @@ #include #include +#ifdef OMPT_SUPPORT +#include "ompt_callback.h" +#define OMPT_IF_ENABLED(stmts) \ + if (ompt_enabled) { \ + stmts \ + } +#else +#define OMPT_IF_ENABLED(stmts) +#endif + DeviceTy::DeviceTy(RTLInfoTy *RTL) : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), @@ -431,11 +441,36 @@ } void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { - return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_alloc_begin(RTLDeviceID, HstPtr, Size, + codeptr);) + + void *tgt_ptr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); + + OMPT_IF_ENABLED( + ompt_interface.target_data_alloc_end(RTLDeviceID, HstPtr, Size, codeptr); + ompt_interface.ompt_state_clear();) + return tgt_ptr; } int32_t DeviceTy::deleteData(void *TgtPtrBegin) { - return RTL->data_delete(RTLDeviceID, TgtPtrBegin); + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_delete_begin(RTLDeviceID, TgtPtrBegin, + codeptr);) + + int32_t status = RTL->data_delete(RTLDeviceID, TgtPtrBegin); + + OMPT_IF_ENABLED( + ompt_interface.target_data_delete_end(RTLDeviceID, TgtPtrBegin, codeptr); + ompt_interface.ompt_state_clear();) + + return status; } // Submit data to device @@ -453,11 +488,24 @@ : "unknown"); } + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_submit_begin(RTLDeviceID, TgtPtrBegin, + HstPtrBegin, Size, codeptr);) + + int32_t status; if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) - return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); + status = RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); else - return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, - AsyncInfo); + status = RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, + AsyncInfo); + + OMPT_IF_ENABLED(ompt_interface.target_data_submit_end( + RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, codeptr); + ompt_interface.ompt_state_clear();) + return status; } // Retrieve data from device @@ -474,11 +522,25 @@ : "unknown"); } + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_retrieve_begin(RTLDeviceID, HstPtrBegin, + TgtPtrBegin, Size, codeptr);) + + int32_t status; if (!RTL->data_retrieve_async || !RTL->synchronize) - return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); + status = RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); else - return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, - AsyncInfo); + status = RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, + Size, AsyncInfo); + + OMPT_IF_ENABLED(ompt_interface.target_data_retrieve_end( + RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, codeptr); + ompt_interface.ompt_state_clear();) + + return status; } // Copy data from current device to destination device directly diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -43,6 +43,7 @@ llvm_omp_get_dynamic_shared; __tgt_set_info_flag; __tgt_print_device_info; + libomptarget_ompt_connect; local: *; }; diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "device.h" #include "omptarget.h" #include "private.h" @@ -21,6 +23,16 @@ #include #include +#ifdef OMPT_SUPPORT +#include "ompt_callback.h" +#define OMPT_IF_ENABLED(stmts) \ + if (ompt_enabled) { \ + stmts \ + } +#else +#define OMPT_IF_ENABLED(stmts) +#endif + //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t flags) { @@ -93,6 +105,13 @@ TIMESCOPE_WITH_IDENT(loc); DP("Entering data begin region for device %" PRId64 " with %d mappings\n", device_id, arg_num); + + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_enter_begin(device_id, codeptr);) + if (checkDeviceAndCtors(device_id, loc)) { DP("Not offloading to device %" PRId64 "\n", device_id); return; @@ -118,6 +137,9 @@ if (rc == OFFLOAD_SUCCESS) rc = AsyncInfo.synchronize(); handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + + OMPT_IF_ENABLED(ompt_interface.target_data_enter_end(device_id, codeptr); + ompt_interface.ompt_state_clear();) } EXTERN void __tgt_target_data_begin_nowait_mapper( @@ -182,11 +204,21 @@ #endif AsyncInfoTy AsyncInfo(Device); + + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_data_exit_begin(device_id, codeptr);) + int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, AsyncInfo); if (rc == OFFLOAD_SUCCESS) rc = AsyncInfo.synchronize(); handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + + OMPT_IF_ENABLED(ompt_interface.target_data_exit_end(device_id, codeptr); + ompt_interface.ompt_state_clear();) } EXTERN void __tgt_target_data_end_nowait_mapper( @@ -226,6 +258,13 @@ void **arg_mappers) { TIMESCOPE_WITH_IDENT(loc); DP("Entering data update with %d mappings\n", arg_num); + + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_update_begin(device_id, codeptr);) + if (checkDeviceAndCtors(device_id, loc)) { DP("Not offloading to device %" PRId64 "\n", device_id); return; @@ -242,6 +281,9 @@ if (rc == OFFLOAD_SUCCESS) rc = AsyncInfo.synchronize(); handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + + OMPT_IF_ENABLED(ompt_interface.target_update_end(device_id, codeptr); + ompt_interface.ompt_state_clear();) } EXTERN void __tgt_target_data_update_nowait_mapper( @@ -301,12 +343,23 @@ DeviceTy &Device = *PM->Devices[device_id]; AsyncInfoTy AsyncInfo(Device); + + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_begin(device_id, codeptr);) + int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, AsyncInfo); if (rc == OFFLOAD_SUCCESS) rc = AsyncInfo.synchronize(); handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + + OMPT_IF_ENABLED(ompt_interface.target_end(device_id, codeptr); + ompt_interface.ompt_state_clear();) + assert(rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!"); return OMP_TGT_SUCCESS; } @@ -375,12 +428,23 @@ DeviceTy &Device = *PM->Devices[device_id]; AsyncInfoTy AsyncInfo(Device); + + void *codeptr = nullptr; + OMPT_IF_ENABLED( + codeptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), codeptr); + ompt_interface.target_begin(device_id, codeptr);) + int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, team_num, thread_limit, true /*team*/, AsyncInfo); if (rc == OFFLOAD_SUCCESS) rc = AsyncInfo.synchronize(); handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + + OMPT_IF_ENABLED(ompt_interface.target_end(device_id, codeptr); + ompt_interface.ompt_state_clear();) + assert(rc == OFFLOAD_SUCCESS && "__tgt_target_teams_mapper unexpected failure!"); return OMP_TGT_SUCCESS; @@ -424,9 +488,9 @@ MapComponentInfoTy(base, begin, size, type, name)); } -EXTERN void __kmpc_push_target_tripcount(int64_t device_id, +EXTERN void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, uint64_t loop_tripcount) { - __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount); + __kmpc_push_target_tripcount_mapper(loc, device_id, loop_tripcount); } EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, diff --git a/openmp/libomptarget/src/ompt_callback.h b/openmp/libomptarget/src/ompt_callback.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/ompt_callback.h @@ -0,0 +1,112 @@ +//===----------- device.h - Target independent OpenMP target RTL ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declarations for OpenMP Tool callback dispatchers +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTARGET_CALLBACK_H +#define _OMPTARGET_CALLBACK_H + +#if (__PPC64__ | __arm__) +#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level) +#define OMPT_FRAME_POSITION_DEFAULT ompt_frame_cfa +#else +#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level) +#define OMPT_FRAME_POSITION_DEFAULT ompt_frame_framepointer +#endif + +#define OMPT_FRAME_FLAGS (ompt_frame_runtime | OMPT_FRAME_POSITION_DEFAULT) + +#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) + +#include + +class OmptInterface { +public: + OmptInterface() + : _enter_frame(NULL), _codeptr_ra(NULL), _state(ompt_state_idle) {} + + void ompt_state_set(void *enter_frame, void *codeptr_ra); + + void ompt_state_clear(); + + // target op callbacks + void target_data_alloc_begin(int64_t device_id, void *TgtPtrBegin, + size_t Size, void *codeptr); + + void target_data_alloc_end(int64_t device_id, void *TgtPtrBegin, size_t Size, + void *codeptr); + + void target_data_submit_begin(int64_t device_id, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *codeptr); + + void target_data_submit_end(int64_t device_id, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *codeptr); + + void target_data_delete_begin(int64_t device_id, void *TgtPtrBegin, + void *codeptr); + + void target_data_delete_end(int64_t device_id, void *TgtPtrBegin, + void *codeptr); + + void target_data_retrieve_begin(int64_t device_id, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, + void *codeptr); + + void target_data_retrieve_end(int64_t device_id, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *codeptr); + + void target_submit_begin(unsigned int num_teams = 1); + + void target_submit_end(unsigned int num_teams = 1); + + // target region callbacks + void target_data_enter_begin(int64_t device_id, void *codeptr); + + void target_data_enter_end(int64_t device_id, void *codeptr); + + void target_data_exit_begin(int64_t device_id, void *codeptr); + + void target_data_exit_end(int64_t device_id, void *codeptr); + + void target_update_begin(int64_t device_id, void *codeptr); + + void target_update_end(int64_t device_id, void *codeptr); + + void target_begin(int64_t device_id, void *codeptr); + + void target_end(int64_t device_id, void *codeptr); + +private: + void ompt_state_set_helper(void *enter_frame, void *codeptr_ra, int flags, + int state); + + // begin/end target op marks + void target_operation_begin(); + + void target_operation_end(); + + // begin/end target region marks + void target_region_begin(); + + void target_region_end(); + + void target_region_announce(const char *name); + +private: + void *_enter_frame; + void *_codeptr_ra; + int _state; +}; + +extern thread_local OmptInterface ompt_interface; + +extern bool ompt_enabled; + +#endif diff --git a/openmp/libomptarget/src/ompt_callback.cpp b/openmp/libomptarget/src/ompt_callback.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/ompt_callback.cpp @@ -0,0 +1,431 @@ +//===-- ompt_callback.cpp - Target independent OpenMP target RTL -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of OMPT callback interfaces for target independent layer +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +//**************************************************************************** +// local include files +//**************************************************************************** + +#include + +#include "ompt_callback.h" +#include "private.h" + +#include +#include + +/******************************************************************************* + * macros + *******************************************************************************/ + +#define OMPT_CALLBACK_AVAILABLE(fn) (ompt_enabled && fn) +#define OMPT_CALLBACK(fn, args) ompt_device_callbacks.fn args +#define fnptr_to_ptr(x) ((void *)(uint64_t)x) + +/******************************************************************************* + * type declarations + *******************************************************************************/ + +class libomptarget_rtl_finalizer_t : std::list { +public: + libomptarget_rtl_finalizer_t() : fn(0){}; + + void register_rtl(ompt_finalize_t _fn) { + assert(fn == 0); + fn = _fn; + }; + + void finalize() { + if (fn) + fn(NULL); + fn = 0; + }; + + ompt_finalize_t fn; +}; + +typedef int (*ompt_set_frame_enter_t)(void *addr, int flags, int state); + +typedef ompt_data_t *(*ompt_get_task_data_t)(); +typedef ompt_data_t *(*ompt_get_target_task_data_t)(); + +/***************************************************************************** + * global data + *****************************************************************************/ + +bool ompt_enabled = false; + +ompt_device_callbacks_t ompt_device_callbacks; + +/***************************************************************************** + * private data + *****************************************************************************/ + +static ompt_set_frame_enter_t ompt_set_frame_enter_fn = 0; +static ompt_get_task_data_t ompt_get_task_data_fn = 0; +static ompt_get_target_task_data_t ompt_get_target_task_data_fn = 0; + +static libomptarget_rtl_finalizer_t libomptarget_rtl_finalizer; + +const char *ompt_device_callbacks_t::documentation = 0; + +/***************************************************************************** + * Thread local data + *****************************************************************************/ + +thread_local OmptInterface ompt_interface; + +static thread_local uint64_t ompt_target_region_id = 1; +static thread_local uint64_t ompt_target_region_opid = 1; +static thread_local ompt_data_t ompt_target_data = ompt_data_none; +static thread_local ompt_data_t *ompt_task_data = 0; +static thread_local ompt_data_t *ompt_target_task_data = 0; +static thread_local ompt_id_t host_op_id = 0; + +static std::atomic unique_id_ticket(1); + +/***************************************************************************** + * OMPT callbacks + *****************************************************************************/ + +void OmptInterface::ompt_state_set_helper(void *enter_frame, void *codeptr_ra, + int flags, int state) { + _enter_frame = enter_frame; + _codeptr_ra = codeptr_ra; + if (ompt_set_frame_enter_fn) { + _state = ompt_set_frame_enter_fn(_enter_frame, flags, state); + } + + return; +} + +void OmptInterface::ompt_state_set(void *enter_frame, void *codeptr_ra) { + ompt_state_set_helper(enter_frame, codeptr_ra, OMPT_FRAME_FLAGS, + ompt_state_work_parallel); +} + +void OmptInterface::ompt_state_clear(void) { + ompt_state_set_helper(0, 0, 0, _state); +} + +/***************************************************************************** + * OMPT private operations + *****************************************************************************/ + +static uint64_t id_create() { return unique_id_ticket.fetch_add(1); } + +static uint64_t opid_create() { + host_op_id = id_create(); + return host_op_id; +} + +static uint64_t opid_get() { return host_op_id; } + +static uint64_t regionid_create() { + ompt_target_data.value = id_create(); + return ompt_target_data.value; +} + +static uint64_t regionid_get() { return ompt_target_data.value; } + +void OmptInterface::target_region_begin() { + // set up task region state + ompt_task_data = ompt_get_task_data_fn(); + ompt_target_task_data = ompt_get_target_task_data_fn(); + + *ompt_task_data = ompt_data_none; + *ompt_target_task_data = ompt_data_none; + ompt_target_data = ompt_data_none; +} + +void OmptInterface::target_region_announce(const char *name) { + DP("in OmptInterface::target_region_%s target_id=%lu\n", name, + ompt_target_data.value); +} + +void OmptInterface::target_region_end() { + ompt_task_data = 0; + ompt_target_task_data = 0; + ompt_target_data = ompt_data_none; +} + +void OmptInterface::target_operation_begin() { + DP("in ompt_target_region_begin (ompt_target_region_opid = %lu)\n", + ompt_target_data.value); +} + +void OmptInterface::target_operation_end() { + DP("in ompt_target_region_end (ompt_target_region_opid = %lu)\n", + ompt_target_data.value); +} + +/***************************************************************************** + * OMPT public operations + *****************************************************************************/ + +// FIXME: optional implementation of target map? + +void OmptInterface::target_data_alloc_begin(int64_t device_id, + void *hst_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_alloc, hst_ptr_begin, device_id, NULL, 0, size, codeptr, + opid_create, &ompt_target_region_opid); + target_operation_begin(); +} + +void OmptInterface::target_data_alloc_end(int64_t device_id, + void *hst_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_alloc, hst_ptr_begin, device_id, NULL, 0, size, codeptr, + opid_get, &ompt_target_region_opid); + target_operation_end(); +} + +void OmptInterface::target_data_submit_begin(int64_t device_id, + void *tgt_ptr_begin, + void *hst_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_to_device, hst_ptr_begin, 0, tgt_ptr_begin, + device_id, size, codeptr, opid_create, &ompt_target_region_opid); + target_operation_begin(); +} + +void OmptInterface::target_data_submit_end(int64_t device_id, + void *tgt_ptr_begin, + void *hst_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_to_device, hst_ptr_begin, 0, tgt_ptr_begin, + device_id, size, codeptr, opid_get, &ompt_target_region_opid); + target_operation_end(); +} + +void OmptInterface::target_data_delete_begin(int64_t device_id, + void *tgt_ptr_begin, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_delete, tgt_ptr_begin, device_id, NULL, 0, 0, codeptr, + opid_create, &ompt_target_region_opid); + target_operation_begin(); +} + +void OmptInterface::target_data_delete_end(int64_t device_id, + void *tgt_ptr_begin, void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_delete, tgt_ptr_begin, device_id, NULL, 0, 0, codeptr, + opid_get, &ompt_target_region_opid); + target_operation_end(); +} + +void OmptInterface::target_data_retrieve_begin(int64_t device_id, + void *hst_ptr_begin, + void *tgt_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_from_device, tgt_ptr_begin, device_id, + hst_ptr_begin, 0, size, codeptr, opid_create, &ompt_target_region_opid); + target_operation_begin(); +} + +void OmptInterface::target_data_retrieve_end(int64_t device_id, + void *hst_ptr_begin, + void *tgt_ptr_begin, size_t size, + void *codeptr) { + ompt_device_callbacks.ompt_callback_target_data_op_emi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_from_device, tgt_ptr_begin, device_id, + hst_ptr_begin, 0, size, codeptr, opid_get, &ompt_target_region_opid); + target_operation_end(); +} + +void OmptInterface::target_submit_begin(unsigned int num_teams) { + ompt_device_callbacks.ompt_callback_target_submit_emi( + ompt_scope_begin, &ompt_target_data, num_teams, opid_create, + &ompt_target_region_opid); +} + +void OmptInterface::target_submit_end(unsigned int num_teams) { + ompt_device_callbacks.ompt_callback_target_submit_emi( + ompt_scope_end, &ompt_target_data, num_teams, opid_get, + &ompt_target_region_opid); +} + +void OmptInterface::target_data_enter_begin(int64_t device_id, void *codeptr) { + target_region_begin(); + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_enter_data, ompt_scope_begin, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_create); +} + +void OmptInterface::target_data_enter_end(int64_t device_id, void *codeptr) { + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_enter_data, ompt_scope_end, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_get); + target_region_end(); +} + +void OmptInterface::target_data_exit_begin(int64_t device_id, void *codeptr) { + target_region_begin(); + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_exit_data, ompt_scope_begin, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_create); + target_region_announce("begin"); +} + +void OmptInterface::target_data_exit_end(int64_t device_id, void *codeptr) { + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_exit_data, ompt_scope_end, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_get); + target_region_end(); +} + +void OmptInterface::target_update_begin(int64_t device_id, void *codeptr) { + target_region_begin(); + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_update, ompt_scope_begin, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_create); + target_region_announce("begin"); +} + +void OmptInterface::target_update_end(int64_t device_id, void *codeptr) { + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target_update, ompt_scope_end, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_get); + target_region_end(); +} + +void OmptInterface::target_begin(int64_t device_id, void *codeptr) { + target_region_begin(); + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target, ompt_scope_begin, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_create); + target_region_announce("begin"); +} + +void OmptInterface::target_end(int64_t device_id, void *codeptr) { + ompt_device_callbacks.ompt_callback_target_emi( + ompt_target, ompt_scope_end, device_id, ompt_task_data, + ompt_target_task_data, &ompt_target_data, codeptr, regionid_get); + target_region_end(); +} + +/***************************************************************************** + * OMPT interface operations + *****************************************************************************/ + +static void LIBOMPTARGET_GET_TARGET_OPID(uint64_t *device_num, + ompt_id_t *target_id, + ompt_id_t *host_op_id) { + *host_op_id = ompt_target_region_opid; +} + +static int libomptarget_ompt_initialize(ompt_function_lookup_t lookup, + int initial_device_num, + ompt_data_t *tool_data) { + DP("enter libomptarget_ompt_initialize!\n"); + + ompt_enabled = true; + +#define ompt_bind_name(fn) \ + fn##_fn = (fn##_t)lookup(#fn); \ + DP("%s=%p\n", #fn, fnptr_to_ptr(fn##_fn)); + + ompt_bind_name(ompt_set_frame_enter); + ompt_bind_name(ompt_get_task_data); + ompt_bind_name(ompt_get_target_task_data); + +#undef ompt_bind_name + + ompt_device_callbacks.register_callbacks(lookup); + + DP("exit libomptarget_ompt_initialize!\n"); + + return 0; +} + +static void libomptarget_ompt_finalize(ompt_data_t *data) { + DP("enter libomptarget_ompt_finalize!\n"); + + libomptarget_rtl_finalizer.finalize(); + + ompt_enabled = false; + + DP("exit libomptarget_ompt_finalize!\n"); +} + +// Today, this is not called from libomptarget +ompt_device *ompt_device_callbacks_t::lookup_device(int device_num) { + assert(0 && "Lookup device should be invoked in the plugin"); + return nullptr; +} + +ompt_interface_fn_t +ompt_device_callbacks_t::lookup(const char *interface_function_name) { + if (strcmp(interface_function_name, + stringify(LIBOMPTARGET_GET_TARGET_OPID)) == 0) + return (ompt_interface_fn_t)LIBOMPTARGET_GET_TARGET_OPID; + + return ompt_device_callbacks.lookup_callback(interface_function_name); +} + +typedef void (*libomp_libomptarget_ompt_init_t)(ompt_start_tool_result_t *); + +__attribute__((weak)) void +libomp_libomptarget_ompt_init(ompt_start_tool_result_t *result) { + // no initialization of OMPT for libomptarget unless + // libomp implements this function + DP("in dummy libomp_libomptarget_ompt_init\n"); +} + +/***************************************************************************** + * constructor + *****************************************************************************/ + +__attribute__((constructor(102))) static void ompt_init(void) { + static library_ompt_connector_t libomp_connector("libomp"); + static ompt_start_tool_result_t ompt_result; + + ompt_result.initialize = libomptarget_ompt_initialize; + ompt_result.finalize = libomptarget_ompt_finalize; + ompt_result.tool_data.value = 0; + + ompt_device_callbacks.init(); + + libomp_connector.connect(&ompt_result); + DP("OMPT: Exit ompt_init\n"); +} + +extern "C" { + +void libomptarget_ompt_connect(ompt_start_tool_result_t *result) { + DP("OMPT: Enter libomptarget_ompt_connect\n"); + if (ompt_enabled && result) { + libomptarget_rtl_finalizer.register_rtl(result->finalize); + result->initialize(ompt_device_callbacks_t::lookup, 0, NULL); + } + DP("OMPT: Leave libomptarget_ompt_connect\n"); +} +} diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -19,6 +19,16 @@ #include #include +#ifdef OMPT_SUPPORT +#include "ompt_callback.h" +#define OMPT_IF_ENABLED(stmts) \ + if (ompt_enabled) { \ + stmts \ + } +#else +#define OMPT_IF_ENABLED(stmts) +#endif + int AsyncInfoTy::synchronize() { int Result = OFFLOAD_SUCCESS; if (AsyncInfo.Queue) { @@ -1455,6 +1465,10 @@ DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index); + OMPT_IF_ENABLED(ompt_interface.ompt_state_set(OMPT_GET_FRAME_ADDRESS(0), + OMPT_GET_RETURN_ADDRESS(0)); + ompt_interface.target_submit_begin(TeamNum);) + { TIMESCOPE_WITH_NAME_AND_IDENT( IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", loc); @@ -1472,6 +1486,9 @@ return OFFLOAD_FAIL; } + OMPT_IF_ENABLED(ompt_interface.target_submit_end(TeamNum); + ompt_interface.ompt_state_clear();) + if (ArgNum) { // Transfer data back and deallocate target memory for (first-)private // variables diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt --- a/openmp/runtime/src/exports_so.txt +++ b/openmp/runtime/src/exports_so.txt @@ -25,7 +25,9 @@ # # OMPT API # + ompt_control; # OMPT control interface ompt_start_tool; # OMPT start interface + libomp_ompt_connect; # OMPT libomptarget interface ompc_*; # omp.h renames some standard functions to ompc_*. kmp_*; # Intel extensions. diff --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var --- a/openmp/runtime/src/include/omp-tools.h.var +++ b/openmp/runtime/src/include/omp-tools.h.var @@ -108,7 +108,7 @@ macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \ macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */ -#define FOREACH_OMPT_EVENT(macro) \ +#define FOREACH_OMPT_HOST_EVENT(macro) \ \ /*--- Mandatory Events ---*/ \ macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ @@ -121,17 +121,9 @@ macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ \ - macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ - macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ - macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ \ macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ \ - macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ - macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ - \ - macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ - macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ \ /* Optional Events */ \ macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \ @@ -145,7 +137,6 @@ \ macro (ompt_callback_masked, ompt_callback_masked_t, 21) /* task at masked begin or end */ \ \ - macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ \ macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \ \ @@ -164,11 +155,48 @@ macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \ \ macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */ \ + macro (ompt_callback_error, ompt_callback_error_t, 37) /* error */ + + +#define FOREACH_OMPT_DEVICE_EVENT(macro) \ + macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ + macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ + \ + macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ + macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ + + +#define FOREACH_OMPT_NOEMI_EVENT(macro) \ + macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ + macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ + macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ + macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ + + +#define FOREACH_OMPT_EMI_EVENT(macro) \ macro (ompt_callback_target_emi, ompt_callback_target_emi_t, 33) /* target */ \ macro (ompt_callback_target_data_op_emi,ompt_callback_target_data_op_emi_t,34) /* target data op */ \ macro (ompt_callback_target_submit_emi, ompt_callback_target_submit_emi_t, 35) /* target submit */ \ macro (ompt_callback_target_map_emi, ompt_callback_target_map_emi_t, 36) /* target map */ \ - macro (ompt_callback_error, ompt_callback_error_t, 37) /* error */ + +#define FOREACH_OMPT_50_TARGET_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) + +#define FOREACH_OMPT_51_TARGET_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +#define FOREACH_OMPT_EVENT(macro) \ + FOREACH_OMPT_HOST_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +#define FOREACH_OMPT_51_EVENT(macro) \ + FOREACH_OMPT_HOST_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) /***************************************************************************** * implementation specific types diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3650,8 +3650,12 @@ * to deal with the call. */ enum fork_context_e { - fork_context_gnu, /**< Called from GNU generated code, so must not invoke the - microtask internally. */ + fork_context_gnu_task_program, /**< Called from GNU generated code, so must + not invoke the microtask internally. */ + fork_context_gnu_task_library, /**< Called from GNU generated code, so must + not invoke the microtask internally. */ + fork_context_gnu, /**vi3-merge: This one should replace the previous two I + suppose*/ fork_context_intel, /**< Called from Intel generated code. */ fork_context_last }; diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -109,6 +109,7 @@ static void *ompt_tool_module = NULL; #define OMPT_DLCLOSE(Lib) dlclose(Lib) #endif +static ompt_start_tool_result_t *libomptarget_ompt_result = NULL; /***************************************************************************** * forward declarations @@ -494,13 +495,18 @@ ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( ompt_thread_initial, __ompt_get_thread_data_internal()); } - ompt_data_t *task_data; - ompt_data_t *parallel_data; - __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, - NULL); + ompt_data_t *task_data = nullptr; + ompt_frame_t *task_frame = nullptr; + ompt_data_t *parallel_data = nullptr; + __ompt_get_task_info_internal(0, NULL, &task_data, &task_frame, + ¶llel_data, NULL); if (ompt_enabled.ompt_callback_implicit_task) { + OMPT_FRAME_SET(task_frame, exit, OMPT_GET_FRAME_ADDRESS(0), + (ompt_frame_runtime | OMPT_FRAME_POSITION_DEFAULT)); ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial); + ompt_scope_begin, parallel_data /*parallel data*/, task_data, + 1 /*team size*/, 1 /*initial task: index=1*/, ompt_task_initial); + OMPT_FRAME_CLEAR(task_frame, exit); } ompt_set_thread_state(root_thread, ompt_state_work_serial); @@ -513,6 +519,9 @@ && ompt_start_tool_result && ompt_start_tool_result->finalize #endif ) { + if (libomptarget_ompt_result) { + libomptarget_ompt_result->finalize(NULL); + } ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data)); } @@ -868,5 +877,56 @@ FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) - return NULL; +#undef ompt_interface_fn + + return (ompt_interface_fn_t)0; +} + +static int ompt_set_frame_enter(void *addr, int flags, int state) { + return __ompt_set_frame_enter_internal(addr, flags, state); +} + +static ompt_data_t *ompt_get_task_data() { return __ompt_get_task_data(); } + +static ompt_data_t *ompt_get_target_task_data() { + return __ompt_get_target_task_data(); +} + +static ompt_interface_fn_t libomp_target_fn_lookup(const char *s) { +#define provide_fn(fn) \ + if (strcmp(s, #fn) == 0) \ + return (ompt_interface_fn_t)fn; + + provide_fn(ompt_set_frame_enter); + provide_fn(ompt_get_task_data); + provide_fn(ompt_get_target_task_data); + +#define ompt_interface_fn(fn, type, code) \ + if (strcmp(s, #fn) == 0) \ + return (ompt_interface_fn_t)ompt_callbacks.ompt_callback(fn); + + FOREACH_OMPT_DEVICE_EVENT(ompt_interface_fn) + FOREACH_OMPT_EMI_EVENT(ompt_interface_fn) + FOREACH_OMPT_NOEMI_EVENT(ompt_interface_fn) + +#undef ompt_interface_fn + + return (ompt_interface_fn_t)0; +} + +_OMP_EXTERN void libomp_ompt_connect(ompt_start_tool_result_t *result) { + OMPT_VERBOSE_INIT_PRINT("libomp --> OMPT: Enter libomp_ompt_connect\n"); + + __ompt_force_initialization(); + + if (ompt_enabled.enabled && + ompt_callbacks.ompt_callback(ompt_callback_device_initialize)) { + if (result) { + OMPT_VERBOSE_INIT_PRINT( + "libomp --> OMPT: Connecting with libomptarget\n"); + result->initialize(libomp_target_fn_lookup, 0, NULL); + libomptarget_ompt_result = result; + } + } + OMPT_VERBOSE_INIT_PRINT("libomp --> OMPT: Exit libomp_ompt_connect\n"); } diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -13,6 +13,8 @@ #ifndef __OMPT_INTERNAL_H__ #define __OMPT_INTERNAL_H__ +#include "kmp_platform.h" + #include "ompt-event-specific.h" #include "omp-tools.h" @@ -21,8 +23,18 @@ #define _OMP_EXTERN extern "C" #define OMPT_INVOKER(x) \ - ((x == fork_context_gnu) ? ompt_parallel_invoker_program \ - : ompt_parallel_invoker_runtime) + ((x == fork_context_gnu_task_program) ? ompt_parallel_invoker_program \ + : ompt_parallel_invoker_runtime) + +#define OMPT_FRAME_SET(frame, which, ptr_value, flags) \ + { \ + frame->which##_frame.ptr = ptr_value; \ + frame->which##_frame_flags = flags; \ + } + +#define OMPT_FRAME_CLEAR(frame, which) OMPT_FRAME_SET(frame, which, 0, 0) + +#define OMPT_FRAME_SET_P(frame, which) (frame->which##_frame.ptr != NULL) #define ompt_callback(e) e##_callback @@ -104,7 +116,23 @@ void ompt_fini(void); #define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) + +#if (KMP_ARCH_PPC64 | KMP_ARCH_ARM) +// On Power and ARM, the frame pointer (__builtin_frame_address(0)) +// points to the top of the stack frame. For gcc4, this is not a useful +// value after returning from GOMP_parallel_start to call an outlined +// task in the master thread. To support gcc4 in a uniform fashion, +// always use the canonical frame address (known as CFA, which is the +// top of the caller's stack), which is available as +// __builtin_frame_address(1), for the OMPT frame pointer for a frame. +#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level + 1) +#define OMPT_FRAME_POSITION_DEFAULT ompt_frame_cfa +#define OMPT_FRAME_POSITION_GCC4_TASK ompt_frame_cfa +#else #define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level) +#define OMPT_FRAME_POSITION_DEFAULT ompt_frame_framepointer +#define OMPT_FRAME_POSITION_GCC4_TASK ompt_frame_stackaddress +#endif int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg); diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h --- a/openmp/runtime/src/ompt-specific.h +++ b/openmp/runtime/src/ompt-specific.h @@ -20,7 +20,12 @@ * forward declarations ****************************************************************************/ +void __ompt_force_initialization(); + +int __ompt_set_frame_enter_internal(void *addr, int flags, int state); + void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid); + void __ompt_thread_assign_wait_id(void *variable); void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, @@ -33,6 +38,10 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); +ompt_data_t *__ompt_get_task_data(); + +ompt_data_t *__ompt_get_target_task_data(); + ompt_task_info_t *__ompt_get_task_info_object(int depth); int __ompt_get_parallel_info_internal(int ancestor_level, @@ -57,12 +66,12 @@ * macros ****************************************************************************/ -#define OMPT_CUR_TASK_INFO(thr) (&(thr->th.th_current_task->ompt_task_info)) +#define OMPT_CUR_TASK_INFO(thr) (&((thr)->th.th_current_task->ompt_task_info)) #define OMPT_CUR_TASK_DATA(thr) \ - (&(thr->th.th_current_task->ompt_task_info.task_data)) -#define OMPT_CUR_TEAM_INFO(thr) (&(thr->th.th_team->t.ompt_team_info)) + (&((thr)->th.th_current_task->ompt_task_info.task_data)) +#define OMPT_CUR_TEAM_INFO(thr) (&((thr)->th.th_team->t.ompt_team_info)) #define OMPT_CUR_TEAM_DATA(thr) \ - (&(thr->th.th_team->t.ompt_team_info.parallel_data)) + (&((thr)->th.th_team->t.ompt_team_info.parallel_data)) #define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI @@ -75,20 +84,27 @@ return return_address; } -/*#define OMPT_STORE_RETURN_ADDRESS(gtid) \ +#define OMPT_STORE_RETURN_ADDRESS_GCC4(gtid) \ if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ !__kmp_threads[gtid]->th.ompt_thread_info.return_address) \ __kmp_threads[gtid]->th.ompt_thread_info.return_address = \ - __builtin_return_address(0)*/ + __builtin_return_address(0) + #define OMPT_STORE_RETURN_ADDRESS(gtid) \ OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)}; + #define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) + #define OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid) \ ((ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ __kmp_threads[gtid]->th.ompt_thread_info.return_address) \ ? __ompt_load_return_address(gtid) \ : __builtin_return_address(0)) +#define OMPT_CLEAR_RETURN_ADDRESS(gtid) \ + if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid]) \ + __kmp_threads[gtid]->th.ompt_thread_info.return_address = 0 + //****************************************************************************** // inline functions //****************************************************************************** diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp --- a/openmp/runtime/src/ompt-specific.cpp +++ b/openmp/runtime/src/ompt-specific.cpp @@ -188,6 +188,11 @@ //****************************************************************************** // interface operations //****************************************************************************** +//---------------------------------------------------------- +// initialization support +//---------------------------------------------------------- + +void __ompt_force_initialization() { __kmp_serial_initialize(); } //---------------------------------------------------------- // thread support @@ -260,7 +265,11 @@ lwt->ompt_team_info.master_return_address = codeptr; lwt->ompt_task_info.task_data.value = 0; lwt->ompt_task_info.frame.enter_frame = ompt_data_none; + lwt->ompt_task_info.frame.enter_frame_flags = 0; + ; lwt->ompt_task_info.frame.exit_frame = ompt_data_none; + lwt->ompt_task_info.frame.exit_frame_flags = 0; + ; lwt->ompt_task_info.scheduling_parent = NULL; lwt->heap = 0; lwt->parent = 0; @@ -339,6 +348,20 @@ // task support //---------------------------------------------------------- +ompt_data_t *__ompt_get_task_data() { + kmp_info_t *thr = ompt_get_thread(); + ompt_data_t *task_data = thr ? OMPT_CUR_TASK_DATA(thr) : NULL; + return task_data; +} + +ompt_data_t *__ompt_get_target_task_data() { + // no implementation of target tasks yet; pass a thread-local + // non-null argument that will meet minimal expectations of + // an OMPT tool + static thread_local ompt_data_t target_task_data; + return &target_task_data; +} + int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, ompt_frame_t **task_frame, @@ -479,6 +502,21 @@ return 1; } +//---------------------------------------------------------- +// target region support +//---------------------------------------------------------- + +int __ompt_set_frame_enter_internal(void *addr, int flags, int state) { + int gtid = __kmp_entry_gtid(); + kmp_info_t *thr = __kmp_threads[gtid]; + + ompt_frame_t *ompt_frame = &OMPT_CUR_TASK_INFO(thr)->frame; + OMPT_FRAME_SET(ompt_frame, enter, addr, flags); + int old_state = thr->th.ompt_thread_info.state; + thr->th.ompt_thread_info.state = ompt_state_work_parallel; + return old_state; +} + //---------------------------------------------------------- // team support //----------------------------------------------------------