diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -69,8 +69,24 @@ add_definitions(-DOMPTARGET_DEBUG) endif() +# OMPT support for libomptarget +set(OMPT_TARGET_DEFAULT FALSE) +if ((LIBOMP_HAVE_OMPT_SUPPORT) AND (NOT WIN32)) + set (OMPT_TARGET_DEFAULT TRUE) +endif() +set(LIBOMPTARGET_OMPT_SUPPORT ${OMPT_TARGET_DEFAULT} CACHE BOOL "OMPT-target-support?") +if (LIBOMPTARGET_OMPT_SUPPORT) + add_definitions(-DOMPT_SUPPORT=1) + message(STATUS "OMPT target enabled") +else() + message(STATUS "OMPT target disabled") +endif() + +pythonize_bool(LIBOMPTARGET_OMPT_SUPPORT) + set(LIBOMPTARGET_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) -include_directories(${LIBOMPTARGET_INCLUDE_DIR}) +set(LIBOMP_RUNTIME_SRC_BINARY_DIR ${CMAKE_BINARY_DIR}/openmp/runtime/src) +include_directories(${LIBOMPTARGET_INCLUDE_DIR} ${LIBOMP_RUNTIME_SRC_BINARY_DIR}) # Build target agnostic offloading library. set(LIBOMPTARGET_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) diff --git a/openmp/libomptarget/include/ompt-connector.h b/openmp/libomptarget/include/ompt-connector.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/ompt-connector.h @@ -0,0 +1,107 @@ +//=== ompt-connector.h - Target independent OpenMP target RTL -- C++ ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Support used by OMPT implementation to establish communication between +// various OpenMP runtime libraries: host openmp library, target-independent +// runtime library, and device-dependent runtime libraries. +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPT_CONNECTOR_H +#define _OMPT_CONNECTOR_H + +//**************************************************************************** +// global includes +//**************************************************************************** + +#include +#include + +//**************************************************************************** +// local includes +//**************************************************************************** + +#include +#include +#include + +//**************************************************************************** +// macros +//**************************************************************************** + +#define LIBOMPTARGET_STRINGIFY(s) #s + +#define LIBOMPTARGET_GET_TARGET_OPID libomptarget_get_target_opid + +//**************************************************************************** +// type declarations +//**************************************************************************** + +/// Type for the function to be invoked for connecting two libraries. +typedef void (*OmptConnectRtnTy)(ompt_start_tool_result_t *result); + +/// Establish connection between openmp runtime libraries +/// +/// This class is used to communicate between an OMPT implementation in +/// libomptarget and libomp. It is also used to communicate between an +/// OMPT implementation in a device-specific plugin and +/// libomptarget. The decision whether OMPT is enabled or not needs to +/// be made when the library is loaded before any functions in the +/// library are invoked. For that reason, an instance of this class is +/// intended to be defined in the constructor for libomptarget or a +/// plugin so that the decision about whether OMPT is supposed to be +/// enabled is known before any interface function in the library is +/// invoked. +class OmptLibraryConnectorTy { +public: + /// Use \p LibName as the prefix of the global function used for connecting + /// two libraries, the source indicated by \p LibName and the destination + /// being the one that creates this object. + OmptLibraryConnectorTy(const char *LibName) { + LibConnRtn.append(LibName); + LibConnRtn.append("_connect"); + IsInitialized = false; + } + OmptLibraryConnectorTy() = delete; + /// Use \p OmptResult init to connect the two libraries denoted by this + /// object. The init function of \p OmptResult will be used during connection + /// and the fini function of \p OmptResult will be used during teardown. + void connect(ompt_start_tool_result_t *OmptResult) { + initialize(); + if (!LibConnHandle) + return; + // Call the function provided by the source library for connect + LibConnHandle(OmptResult); + } + +private: + void initialize() { + if (IsInitialized) + return; + + DP("OMPT: Library connection routine = %s\n", LibConnRtn.c_str()); + + void *VPtr = dlsym(NULL, LibConnRtn.c_str()); + // If dlsym fails, the handle will be null. connect() checks + // for this condition + LibConnHandle = + reinterpret_cast(reinterpret_cast(VPtr)); + DP("OMPT: Library connection handle = %p\n", LibConnHandle); + IsInitialized = true; + } + +private: + /// Ensure initialization occurs only once + bool IsInitialized; + /// Handle of connect routine provided by source library + OmptConnectRtnTy LibConnHandle; + /// Name of connect routine provided by source library + std::string LibConnRtn; +}; + +#endif diff --git a/openmp/libomptarget/include/ompt_device_callbacks.h b/openmp/libomptarget/include/ompt_device_callbacks.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/ompt_device_callbacks.h @@ -0,0 +1,267 @@ +//=== ompt_device_callbacks.h - Target independent OpenMP target RTL -- C++ +//---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Interface used by both target-independent and device-dependent runtimes +// to coordinate registration and invocation of OMPT callbacks +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPT_DEVICE_CALLBACKS_H +#define _OMPT_DEVICE_CALLBACKS_H + +//**************************************************************************** +// local includes +//**************************************************************************** + +#include +#include + +#include + +#define FOREACH_OMPT_TARGET_CALLBACK(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +typedef uint64_t (*IdInterfaceTy)(); + +/// Internal representation for OMPT device +class OmptDeviceTy { +public: + OmptDeviceTy() { Enabled.store(false); } + bool initialize() { + bool Old = false; + return Enabled.compare_exchange_strong(Old, true); + } + bool finalize() { + bool Old = true; + return Enabled.compare_exchange_strong(Old, false); + } + +private: + std::atomic Enabled; +}; + +/// Internal representation for OMPT device callback functions. An EMI callback +/// passes a pointer to a correlation-id (HostOpId) to be set by the tool. For a +/// non-EMI callback, the correlation-id (HostOpId) is set by the runtime and +/// provided to the tool. +class OmptDeviceCallbacksTy { +public: + /// Invoked when a device is initialized + void OmptCallbackDeviceInitialize(int DeviceNum, const char *Type) { + if (ompt_callback_device_initialize_fn) { + OmptDeviceTy *Device = lookupDevice(DeviceNum); + if (Device->initialize()) { + ompt_callback_device_initialize_fn( + DeviceNum, Type, (ompt_device_t *)Device, doLookup, Documentation); + } + } + } + + /// Invoked when a device is finalized + void OmptCallbackDeviceFinalize(int DeviceNum) { + if (ompt_callback_device_finalize_fn) { + OmptDeviceTy *Device = lookupDevice(DeviceNum); + if (Device->finalize()) { + ompt_callback_device_finalize_fn(DeviceNum); + } + } + } + + /// Invoked when a device image is loaded + void OmptCallbackDeviceLoad(int DeviceNum, const char *Filename, + int64_t OffsetInFile, void *VmaInFile, + size_t Bytes, void *HostAddr, void *DeviceAddr, + uint64_t ModuleId) { + if (ompt_callback_device_load_fn) { + ompt_callback_device_load_fn(DeviceNum, Filename, OffsetInFile, VmaInFile, + Bytes, HostAddr, DeviceAddr, ModuleId); + } + } + + /// Invoked when a device image is unloaded + void OmptCallbackDeviceUnload(int DeviceNum, uint64_t ModuleId) { + if (ompt_callback_device_unload_fn) { + ompt_callback_device_unload_fn(DeviceNum, ModuleId); + } + } + + /// Invoked when a data transfer is initiated + void OmptCallbackTargetDataOpEmi( + ompt_scope_endpoint_t EndPoint, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DestAddr, int DestDeviceNum, size_t Bytes, + const void *CodePtrRA, IdInterfaceTy IdInterface, ompt_id_t *HostOpId) { + if (ompt_callback_target_data_op_emi_fn) { + ompt_callback_target_data_op_emi_fn( + EndPoint, TargetTaskData, TargetData, HostOpId, OpType, SrcAddr, + SrcDeviceNum, DestAddr, DestDeviceNum, Bytes, CodePtrRA); + } else if (EndPoint == ompt_scope_begin) { + OmptCallbackTargetDataOp(TargetData->value, OpType, SrcAddr, SrcDeviceNum, + DestAddr, DestDeviceNum, Bytes, CodePtrRA, + IdInterface, HostOpId); + } + } + + /// Invoked when a data transfer is initiated + void OmptCallbackTargetDataOp( + ompt_id_t TargetId, ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DestAddr, int DestDeviceNum, size_t Bytes, + const void *CodePtrRA, IdInterfaceTy IdInterface, ompt_id_t *HostOpId) { + if (ompt_callback_target_data_op_fn) { + *HostOpId = IdInterface(); + ompt_callback_target_data_op_fn(TargetId, *HostOpId, OpType, SrcAddr, + SrcDeviceNum, DestAddr, DestDeviceNum, + Bytes, CodePtrRA); + } + } + + /// Invoked when a target region is executed + void OmptCallbackTargetEmi(ompt_target_t Kind, ompt_scope_endpoint_t EndPoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, const void *CodePtrRA, + IdInterfaceTy IdInterface) { + if (ompt_callback_target_emi_fn) { + ompt_callback_target_emi_fn(Kind, EndPoint, DeviceNum, TaskData, + TargetTaskData, TargetData, CodePtrRA); + } else { + OmptCallbackTarget(Kind, EndPoint, DeviceNum, TaskData, CodePtrRA, + TargetData, IdInterface); + } + } + + /// Invoked when a target region is executed + void OmptCallbackTarget(ompt_target_t Kind, ompt_scope_endpoint_t EndPoint, + int DeviceNum, ompt_data_t *TaskData, + const void *CodePtrRA, ompt_data_t *TargetData, + IdInterfaceTy IdInterface) { + // if we reach this point, ompt_callback_target_emi was not + // invoked so a tool didn't provide a target id. thus, we must + // unconditionally get an id here. even if there is no + // OMPT callback target, we need to have an id for use by other + // callbacks. + // note: + // on a scope_begin callback, IdInterface() will generate an id. + // on a scope_end callback, IdInterface() will return the existing + // id. it is safe to do the assignment again. + TargetData->value = IdInterface(); + if (ompt_callback_target_fn) { + ompt_callback_target_fn(Kind, EndPoint, DeviceNum, TaskData, + TargetData->value, CodePtrRA); + } + } + + /// Invoked when a target map clause is executed + void OmptCallbackTargetMapEmi(ompt_data_t *TargetData, unsigned int NItems, + void **HostAddr, void **DeviceAddr, + size_t *Bytes, unsigned int *MappingFlags, + const void *CodePtrRA) { + if (ompt_callback_target_map_emi_fn) { + ompt_callback_target_map_emi_fn(TargetData, NItems, HostAddr, DeviceAddr, + Bytes, MappingFlags, CodePtrRA); + } else { + OmptCallbackTargetMap(TargetData->value, NItems, HostAddr, DeviceAddr, + Bytes, MappingFlags, CodePtrRA); + } + } + + /// Invoked when a target map clause is executed + void OmptCallbackTargetMap(ompt_id_t TargetId, unsigned int NItems, + void **HostAddr, void **DeviceAddr, size_t *Bytes, + unsigned int *MappingFlags, + const void *CodePtrRA) { + if (ompt_callback_target_map_fn) { + ompt_callback_target_map_fn(TargetId, NItems, HostAddr, DeviceAddr, Bytes, + MappingFlags, CodePtrRA); + } + } + + /// Invoked when a target submit is executed + void OmptCallbackTargetSubmitEmi(ompt_scope_endpoint_t EndPoint, + ompt_data_t *TargetData, + unsigned int RequestedNumTeams, + IdInterfaceTy IdInterface, + ompt_id_t *HostOpId) { + if (ompt_callback_target_submit_emi_fn) { + ompt_callback_target_submit_emi_fn(EndPoint, TargetData, HostOpId, + RequestedNumTeams); + } else if (EndPoint == ompt_scope_begin) { + return OmptCallbackTargetSubmit(TargetData->value, RequestedNumTeams, + IdInterface, HostOpId); + } + } + + /// Invoked when a target submit is executed + void OmptCallbackTargetSubmit(ompt_id_t TargetId, + unsigned int RequestedNumTeams, + IdInterfaceTy IdInterface, + ompt_id_t *HostOpId) { + if (ompt_callback_target_submit_fn) { + *HostOpId = IdInterface(); + ompt_callback_target_submit_fn(TargetId, *HostOpId, RequestedNumTeams); + } + } + + /// Initialize the enabled flag and all the callbacks + void init() { + Enabled = false; +#define initName(Name, Type, Code) Name##_fn = 0; + FOREACH_OMPT_TARGET_CALLBACK(initName) +#undef initName + } + + bool isEnabled() { return Enabled; } + void prepareDevices(int NumDevices) { resize(NumDevices); } + + /// Used to register callbacks. \p Lookup is used to query a give callback + /// and the result is assigned to the callback of this object. + void registerCallbacks(ompt_function_lookup_t Lookup) { + Enabled = true; +#define OmptBindCallback(Name, Type, Code) \ + Name##_fn = (Name##_t)Lookup(#Name); \ + DP("OMPT: class bound %s=%p\n", #Name, ((void *)(uint64_t)Name##_fn)); + + FOREACH_OMPT_TARGET_CALLBACK(OmptBindCallback); +#undef OmptBindCallback + } + + /// Used to find a callback given its name + ompt_interface_fn_t lookupCallback(const char *InterfaceFunctionName) { +#define OmptLookup(Name, Type, Code) \ + if (strcmp(InterfaceFunctionName, #Name) == 0) \ + return (ompt_interface_fn_t)Name##_fn; + + FOREACH_OMPT_TARGET_CALLBACK(OmptLookup); +#undef OmptLookup + return (ompt_interface_fn_t)0; + } + + /// Wrapper function to find a callback given its name + static ompt_interface_fn_t doLookup(const char *InterfaceFunctionName); + +private: + bool Enabled; + +#define DeclareName(Name, Type, Code) Name##_t Name##_fn; + FOREACH_OMPT_TARGET_CALLBACK(DeclareName) +#undef DeclareName + + /// Allocate devices + static void resize(int NumDevices); + /// Find a device given its id + static OmptDeviceTy *lookupDevice(int DeviceNum); + /// Documentation based on omp-tools + static const char *Documentation; +}; + +extern OmptDeviceCallbacksTy ompt_device_callbacks; + +#endif diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -353,7 +353,8 @@ int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); -void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount); +void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, + uint64_t loop_tripcount); void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, uint64_t loop_tripcount); diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt --- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt +++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt @@ -69,6 +69,7 @@ impl/system.cpp impl/msgpack.cpp src/rtl.cpp + src/ompt_callback.cpp ${LIBOMPTARGET_EXTRA_SOURCE} ) add_dependencies(omptarget.rtl.amdgpu omptarget.devicertl.amdgpu) diff --git a/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp b/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp @@ -0,0 +1,136 @@ +//===------ ompt_callback.cpp - Target RTLs Implementation -------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// OMPT support for AMDGPU +// +//===----------------------------------------------------------------------===// + +//**************************************************************************** +// global includes +//**************************************************************************** + +#include +#include + +#include + +//**************************************************************************** +// debug macro needed by include files +//**************************************************************************** + +#ifndef DEBUG_PREFIX +#define DEBUG_PREFIX "Target AMDGPU RTL" +#endif + +//**************************************************************************** +// local includes +//**************************************************************************** + +#include +#include +#include + +//**************************************************************************** +// macros +//**************************************************************************** + +#define FOREACH_TARGET_FN(macro) + +#define fnptr_to_ptr(x) ((void *)(uint64_t)x) + +//**************************************************************************** +// global data +//**************************************************************************** + +OmptDeviceCallbacksTy ompt_device_callbacks; + +//**************************************************************************** +// private data +//**************************************************************************** + +const char *OmptDeviceCallbacksTy::Documentation = 0; + +static OmptDeviceTy *devices = 0; + +//**************************************************************************** +// private operations +//**************************************************************************** + +void OmptDeviceCallbacksTy::resize(int NumDevices) { + devices = new OmptDeviceTy[NumDevices]; +} + +OmptDeviceTy *OmptDeviceCallbacksTy::lookupDevice(int DeviceNum) { + return &devices[DeviceNum]; +} + +ompt_interface_fn_t +OmptDeviceCallbacksTy::doLookup(const char *InterfaceFunctionName) { +#define macro(fn) \ + if (strcmp(InterfaceFunctionName, #fn) == 0) \ + return (ompt_interface_fn_t)fn; + + FOREACH_TARGET_FN(macro); +#undef macro + return (ompt_interface_fn_t)0; +} + +#ifdef OMPT_SUPPORT + +static bool ompt_enabled = false; +static ompt_get_target_info_t LIBOMPTARGET_GET_TARGET_OPID; + +static int ompt_device_init(ompt_function_lookup_t lookup, + int initial_device_num, ompt_data_t *tool_data) { + DP("OMPT: Enter ompt_device_init\n"); + + ompt_enabled = true; + + LIBOMPTARGET_GET_TARGET_OPID = (ompt_get_target_info_t)lookup( + LIBOMPTARGET_STRINGIFY(LIBOMPTARGET_GET_TARGET_OPID)); + + DP("OMPT: libomptarget_get_target_info = %p\n", + fnptr_to_ptr(LIBOMPTARGET_GET_TARGET_OPID)); + + ompt_device_callbacks.registerCallbacks(lookup); + + DP("OMPT: Exit ompt_device_init\n"); + + return 0; +} + +static void ompt_device_fini(ompt_data_t *tool_data) { + DP("OMPT: executing amdgpu_ompt_device_fini\n"); +} + +//**************************************************************************** +// constructor +//**************************************************************************** +/// Used to initialize callbacks implemented by the tool. This interface will +/// lookup the callbacks table in libomptarget and assign them to the callbacks +/// table maintained in this plugin library. +__attribute__((constructor)) static void ompt_init(void) { + DP("OMPT: Entering ompt_init\n"); + /// Connect with libomptarget + static OmptLibraryConnectorTy LibomptargetConnector("ompt_libomptarget"); + static ompt_start_tool_result_t OmptResult; + + // Initialize OmptResult with the init and fini functions that will be + // called by the connector + OmptResult.initialize = ompt_device_init; + OmptResult.finalize = ompt_device_fini; + OmptResult.tool_data.value = 0; + + // Initialize the device callbacks first + ompt_device_callbacks.init(); + + // Now call connect that causes the above init/fini functions to be called + LibomptargetConnector.connect(&OmptResult); + DP("OMPT: Exiting ompt_init\n"); +} +#endif diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -38,6 +38,18 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" +#ifdef OMPT_SUPPORT +#include +#define OMPT_IF_BUILT_AND_ENABLED(stmts) \ + do { \ + if (ompt_device_callbacks.isEnabled()) { \ + stmts; \ + } \ + } while (0) +#else +#define OMPT_IF_BUILT_AND_ENABLED(stmts) +#endif + // hostrpc interface, FIXME: consider moving to its own include these are // statically linked into amdgpu/plugin if present from hostrpc_services.a, // linked as --whole-archive to override the weak symbols that are used to @@ -702,6 +714,28 @@ return res; } +#ifdef OMPT_SUPPORT + void doOmptDeviceLoad(int32_t DeviceId, const char *FileName, + int64_t OffsetInFile, void *VmaInFile, size_t Bytes, + void *HostAddr, void *DeviceAddr, uint64_t ModuleId) { + ompt_device_callbacks.OmptCallbackDeviceLoad( + DeviceId, FileName, OffsetInFile, VmaInFile, Bytes, HostAddr, + DeviceAddr, ModuleId); + } + + void doOmptDeviceInitialize(int32_t DeviceId, const char *InfoName) { + std::string OmptGPUType("AMD "); + OmptGPUType += InfoName; + ompt_device_callbacks.OmptCallbackDeviceInitialize(DeviceId, + OmptGPUType.c_str()); + } + + void doOmptDeviceFinalize() { + for (int i = 0; i < NumberOfDevices; i++) + ompt_device_callbacks.OmptCallbackDeviceFinalize(i); + } +#endif + RTLDeviceInfoTy() { DP("Start initializing " GETNAME(TARGET_NAME) "\n"); @@ -777,6 +811,13 @@ return; } +#ifdef OMPT_SUPPORT + // TODO ompt_device_callbacks.enabled is not yet set since + // register_callbacks on the plugin instance is not yet + // called. Hence, unconditionally prepare devices. + ompt_device_callbacks.prepareDevices(NumberOfDevices); +#endif + for (int i = 0; i < NumberOfDevices; i++) { uint32_t queue_size = 0; { @@ -828,6 +869,9 @@ // Then none of these can have been set up and they can't be torn down return; } + + OMPT_IF_BUILT_AND_ENABLED(doOmptDeviceFinalize()); + // Run destructors on types that use HSA before // impl_finalize removes access to it deviceStateStore.clear(); @@ -1807,6 +1851,9 @@ DeviceInfo.GroupsPerDevice[device_id] * DeviceInfo.ThreadsPerGroup[device_id]); + OMPT_IF_BUILT_AND_ENABLED( + DeviceInfo.doOmptDeviceInitialize(device_id, GetInfoName)); + return OFFLOAD_SUCCESS; } @@ -1888,7 +1935,6 @@ return NULL; } - err = env.after_loading(); if (err != HSA_STATUS_SUCCESS) { return NULL; @@ -1897,6 +1943,10 @@ DP("AMDGPU module successfully loaded!\n"); + OMPT_IF_BUILT_AND_ENABLED(DeviceInfo.doOmptDeviceLoad( + device_id, nullptr /* FileName */, 0 /* OffsetInFile */, + nullptr /* VmaInFile */, img_size, image->ImageStart, + nullptr /* DeviceAddr */, 0 /* FIXME ModuleId */)); { // the device_State array is either large value in bss or a void* that // needs to be assigned to a pointer to an array of size device_state_bytes diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -18,6 +18,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}/interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/interop.cpp ${CMAKE_CURRENT_SOURCE_DIR}/omptarget.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ompt_callback.cpp ${CMAKE_CURRENT_SOURCE_DIR}/rtl.cpp ) diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "device.h" +#include "ompt_callback.h" #include "omptarget.h" #include "private.h" #include "rtl.h" @@ -495,10 +496,15 @@ } void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { + OMPT_IF_BUILT(OmptInterfaceTargetDataOpRAII TgtDataAlloc( + RTLDeviceID, Size, HstPtr, nullptr /* TgtPtr */, ompt_target_data_alloc)); return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); } int32_t DeviceTy::deleteData(void *TgtPtrBegin) { + OMPT_IF_BUILT(OmptInterfaceTargetDataOpRAII TgtDataDelete( + RTLDeviceID, 0 /* Size */, nullptr /* HostPtr */, TgtPtrBegin, + ompt_target_data_delete)); return RTL->data_delete(RTLDeviceID, TgtPtrBegin); } @@ -517,12 +523,16 @@ (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() : "unknown"); } - - if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) - return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); - else - return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, - AsyncInfo); + { + OMPT_IF_BUILT(OmptInterfaceTargetDataOpRAII TargetDataSubmitRAII( + RTLDeviceID, Size, HstPtrBegin, TgtPtrBegin, + ompt_target_data_transfer_to_device)); + if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) + return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); + else + return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, + AsyncInfo); + } } // Retrieve data from device @@ -539,12 +549,16 @@ (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() : "unknown"); } - - if (!RTL->data_retrieve_async || !RTL->synchronize) - return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); - else - return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, - AsyncInfo); + { + OMPT_IF_BUILT(OmptInterfaceTargetDataOpRAII TargetDataRetrieve( + RTLDeviceID, Size, HstPtrBegin, TgtPtrBegin, + ompt_target_data_transfer_from_device)); + if (!RTL->data_retrieve_async || !RTL->synchronize) + return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); + else + return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, + Size, AsyncInfo); + } } // Copy data from current device to destination device directly diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -52,6 +52,7 @@ __tgt_interop_init; __tgt_interop_use; __tgt_interop_destroy; + ompt_libomptarget_connect; local: *; }; diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "device.h" #include "omptarget.h" #include "private.h" @@ -21,6 +23,8 @@ #include #include +#include "ompt_callback.h" + //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t flags) { @@ -93,6 +97,7 @@ TIMESCOPE_WITH_IDENT(loc); DP("Entering data begin region for device %" PRId64 " with %d mappings\n", device_id, arg_num); + if (checkDeviceAndCtors(device_id, loc)) { DP("Not offloading to device %" PRId64 "\n", device_id); return; @@ -112,12 +117,16 @@ } #endif - AsyncInfoTy AsyncInfo(Device); - int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes, - arg_types, arg_names, arg_mappers, AsyncInfo); - if (rc == OFFLOAD_SUCCESS) - rc = AsyncInfo.synchronize(); - handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + { + OMPT_IF_BUILT(OmptInterfaceTargetRAII TargetDataBeginRAII( + device_id, ompt_target_enter_data)); + AsyncInfoTy AsyncInfo(Device); + int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes, + arg_types, arg_names, arg_mappers, AsyncInfo); + if (rc == OFFLOAD_SUCCESS) + rc = AsyncInfo.synchronize(); + handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + } } EXTERN void __tgt_target_data_begin_nowait_mapper( @@ -181,12 +190,16 @@ } #endif - AsyncInfoTy AsyncInfo(Device); - int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes, - arg_types, arg_names, arg_mappers, AsyncInfo); - if (rc == OFFLOAD_SUCCESS) - rc = AsyncInfo.synchronize(); - handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + { + OMPT_IF_BUILT(OmptInterfaceTargetRAII TargetDataEndRAII( + device_id, ompt_target_exit_data)); + AsyncInfoTy AsyncInfo(Device); + int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes, + arg_types, arg_names, arg_mappers, AsyncInfo); + if (rc == OFFLOAD_SUCCESS) + rc = AsyncInfo.synchronize(); + handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + } } EXTERN void __tgt_target_data_end_nowait_mapper( @@ -226,6 +239,7 @@ void **arg_mappers) { TIMESCOPE_WITH_IDENT(loc); DP("Entering data update with %d mappings\n", arg_num); + if (checkDeviceAndCtors(device_id, loc)) { DP("Not offloading to device %" PRId64 "\n", device_id); return; @@ -236,12 +250,16 @@ arg_names, "Updating OpenMP data"); DeviceTy &Device = *PM->Devices[device_id]; - AsyncInfoTy AsyncInfo(Device); - int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes, - arg_types, arg_names, arg_mappers, AsyncInfo); - if (rc == OFFLOAD_SUCCESS) - rc = AsyncInfo.synchronize(); - handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + { + OMPT_IF_BUILT(OmptInterfaceTargetRAII TargetDataUpdateRAII( + device_id, ompt_target_update)); + AsyncInfoTy AsyncInfo(Device); + int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes, + arg_types, arg_names, arg_mappers, AsyncInfo); + if (rc == OFFLOAD_SUCCESS) + rc = AsyncInfo.synchronize(); + handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + } } EXTERN void __tgt_target_data_update_nowait_mapper( @@ -300,14 +318,18 @@ #endif DeviceTy &Device = *PM->Devices[device_id]; - AsyncInfoTy AsyncInfo(Device); - int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, - AsyncInfo); - if (rc == OFFLOAD_SUCCESS) - rc = AsyncInfo.synchronize(); - handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); - assert(rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!"); + + { + OMPT_IF_BUILT(OmptInterfaceTargetRAII TargetRAII(device_id, ompt_target)); + AsyncInfoTy AsyncInfo(Device); + int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, + arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, + AsyncInfo); + if (rc == OFFLOAD_SUCCESS) + rc = AsyncInfo.synchronize(); + handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + assert(rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!"); + } return OMP_TGT_SUCCESS; } @@ -374,15 +396,19 @@ #endif DeviceTy &Device = *PM->Devices[device_id]; - AsyncInfoTy AsyncInfo(Device); - int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types, arg_names, arg_mappers, team_num, thread_limit, - true /*team*/, AsyncInfo); - if (rc == OFFLOAD_SUCCESS) - rc = AsyncInfo.synchronize(); - handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); - assert(rc == OFFLOAD_SUCCESS && - "__tgt_target_teams_mapper unexpected failure!"); + + { + OMPT_IF_BUILT(OmptInterfaceTargetRAII TargetRAII(device_id, ompt_target)); + AsyncInfoTy AsyncInfo(Device); + int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, + arg_types, arg_names, arg_mappers, team_num, thread_limit, + true /*team*/, AsyncInfo); + if (rc == OFFLOAD_SUCCESS) + rc = AsyncInfo.synchronize(); + handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + assert(rc == OFFLOAD_SUCCESS && + "__tgt_target_teams_mapper unexpected failure!"); + } return OMP_TGT_SUCCESS; } @@ -424,9 +450,9 @@ MapComponentInfoTy(base, begin, size, type, name)); } -EXTERN void __kmpc_push_target_tripcount(int64_t device_id, +EXTERN void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id, uint64_t loop_tripcount) { - __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount); + __kmpc_push_target_tripcount_mapper(loc, device_id, loop_tripcount); } EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, diff --git a/openmp/libomptarget/src/ompt_callback.h b/openmp/libomptarget/src/ompt_callback.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/ompt_callback.h @@ -0,0 +1,259 @@ +//===----------- device.h - Target independent OpenMP target RTL ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declarations for OpenMP Tool callback dispatchers +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTARGET_OMPT_CALLBACK_H +#define _OMPTARGET_OMPT_CALLBACK_H + +// If target OMPT support is compiled in +#ifdef OMPT_SUPPORT +#define OMPT_IF_BUILT(stmt) stmt +#else +#define OMPT_IF_BUILT(stmt) +#endif + +#if (__PPC64__ | __arm__) +#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level) +#define OMPT_FRAME_POSITION_DEFAULT ompt_frame_cfa +#else +#define OMPT_GET_FRAME_ADDRESS(level) __builtin_frame_address(level) +#define OMPT_FRAME_POSITION_DEFAULT ompt_frame_framepointer +#endif + +#define OMPT_FRAME_FLAGS (ompt_frame_runtime | OMPT_FRAME_POSITION_DEFAULT) +#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) + +#include + +/// Used to maintain execution state for this thread +class OmptInterface { +public: + OmptInterface() + : EnterFrame(NULL), Codeptr(NULL), InterfaceState(ompt_state_idle) {} + + void setState(void *Frame, void *Codeptr); + + void clearState(); + + // target op callbacks + void beginTargetDataAlloc(int64_t DeviceId, void *TgtPtrBegin, size_t Size, + void *Code); + + void endTargetDataAlloc(int64_t DeviceId, void *TgtPtrBegin, size_t Size, + void *Code); + + void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code); + + void endTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code); + + void beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code); + + void endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code); + + void beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code); + + void endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code); + + void beginTargetSubmit(unsigned int NumTeams = 1); + + void endTargetSubmit(unsigned int NumTeams = 1); + + // target region callbacks + void beginTargetDataEnter(int64_t DeviceId, void *Code); + + void endTargetDataEnter(int64_t DeviceId, void *Code); + + void beginTargetDataExit(int64_t DeviceId, void *Code); + + void endTargetDataExit(int64_t DeviceId, void *Code); + + void beginTargetUpdate(int64_t DeviceId, void *Code); + + void endTargetUpdate(int64_t DeviceId, void *Code); + + void beginTarget(int64_t DeviceId, void *Code); + + void endTarget(int64_t DeviceId, void *Code); + +private: + void setStateHelper(void *Frame, void *Code, int Flags, int State); + + // begin/end target op marks + void beginTargetOperation(); + + void endTargetOperation(); + + // begin/end target region marks + void beginTargetRegion(); + + void endTargetRegion(); + + void announceTargetRegion(const char *Name); + +private: + void *EnterFrame; + void *Codeptr; + int InterfaceState; +}; + +extern thread_local OmptInterface ompt_interface; +extern bool ompt_enabled; + +// RAII objects used by the runtime entry points +struct OmptInterfaceTargetRAII { + OmptInterfaceTargetRAII(int64_t Id, ompt_target_t Op) + : CodePtr{nullptr}, DeviceId{Id}, TgtOp{Op} { + if (!ompt_enabled) + return; + CodePtr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.setState(OMPT_GET_FRAME_ADDRESS(0), CodePtr); + switch (TgtOp) { + case ompt_target_enter_data: + case ompt_target_enter_data_nowait: + ompt_interface.beginTargetDataEnter(DeviceId, CodePtr); + break; + case ompt_target_exit_data: + case ompt_target_exit_data_nowait: + ompt_interface.beginTargetDataExit(DeviceId, CodePtr); + break; + case ompt_target_update: + case ompt_target_update_nowait: + ompt_interface.beginTargetUpdate(DeviceId, CodePtr); + break; + case ompt_target: + case ompt_target_nowait: + ompt_interface.beginTarget(DeviceId, CodePtr); + break; + } + } + ~OmptInterfaceTargetRAII() { + if (!ompt_enabled) + return; + switch (TgtOp) { + case ompt_target_enter_data: + case ompt_target_enter_data_nowait: + ompt_interface.endTargetDataEnter(DeviceId, CodePtr); + break; + case ompt_target_exit_data: + case ompt_target_exit_data_nowait: + ompt_interface.endTargetDataExit(DeviceId, CodePtr); + break; + case ompt_target_update: + case ompt_target_update_nowait: + ompt_interface.endTargetUpdate(DeviceId, CodePtr); + break; + case ompt_target: + case ompt_target_nowait: + ompt_interface.endTarget(DeviceId, CodePtr); + break; + } + ompt_interface.clearState(); + } + +private: + void *CodePtr; + int64_t DeviceId; + ompt_target_t TgtOp; +}; + +struct OmptInterfaceTargetSubmitRAII { + OmptInterfaceTargetSubmitRAII(int32_t Teams) : NumTeams{Teams} { + if (!ompt_enabled) + return; + ompt_interface.setState(OMPT_GET_FRAME_ADDRESS(0), + OMPT_GET_RETURN_ADDRESS(0)); + ompt_interface.beginTargetSubmit(NumTeams); + } + ~OmptInterfaceTargetSubmitRAII() { + if (!ompt_enabled) + return; + ompt_interface.endTargetSubmit(NumTeams); + ompt_interface.clearState(); + } + +private: + int32_t NumTeams; +}; + +struct OmptInterfaceTargetDataOpRAII { + OmptInterfaceTargetDataOpRAII(int32_t Id, int64_t Sz, void *HPtr, void *TPtr, + ompt_target_data_op_t Op) + : DeviceId{Id}, Size{Sz}, CodePtr{nullptr}, HostPtr{HPtr}, TgtPtr{TPtr}, + TgtDataOp{Op} { + if (!ompt_enabled) + return; + CodePtr = OMPT_GET_RETURN_ADDRESS(0); + ompt_interface.setState(OMPT_GET_FRAME_ADDRESS(0), CodePtr); + switch (TgtDataOp) { + case ompt_target_data_alloc: + case ompt_target_data_alloc_async: + ompt_interface.beginTargetDataAlloc(DeviceId, HostPtr, Size, CodePtr); + break; + case ompt_target_data_delete: + case ompt_target_data_delete_async: + ompt_interface.beginTargetDataDelete(DeviceId, TgtPtr, CodePtr); + break; + case ompt_target_data_transfer_to_device: + case ompt_target_data_transfer_to_device_async: + ompt_interface.beginTargetDataSubmit(DeviceId, TgtPtr, HostPtr, Size, + CodePtr); + break; + case ompt_target_data_transfer_from_device: + case ompt_target_data_transfer_from_device_async: + ompt_interface.beginTargetDataRetrieve(DeviceId, HostPtr, TgtPtr, Size, + CodePtr); + break; + default: + break; + } + } + ~OmptInterfaceTargetDataOpRAII() { + if (!ompt_enabled) + return; + switch (TgtDataOp) { + case ompt_target_data_alloc: + case ompt_target_data_alloc_async: + ompt_interface.endTargetDataAlloc(DeviceId, HostPtr, Size, CodePtr); + break; + case ompt_target_data_delete: + case ompt_target_data_delete_async: + ompt_interface.endTargetDataDelete(DeviceId, TgtPtr, CodePtr); + break; + case ompt_target_data_transfer_to_device: + case ompt_target_data_transfer_to_device_async: + ompt_interface.endTargetDataSubmit(DeviceId, TgtPtr, HostPtr, Size, + CodePtr); + break; + case ompt_target_data_transfer_from_device: + case ompt_target_data_transfer_from_device_async: + ompt_interface.endTargetDataRetrieve(DeviceId, HostPtr, TgtPtr, Size, + CodePtr); + break; + default: + break; + } + ompt_interface.clearState(); + } + +private: + int32_t DeviceId; + int64_t Size; + void *CodePtr; + void *HostPtr; + void *TgtPtr; + ompt_target_data_op_t TgtDataOp; +}; + +#endif // _OMPTARGET_OMPT_CALLBACK_H diff --git a/openmp/libomptarget/src/ompt_callback.cpp b/openmp/libomptarget/src/ompt_callback.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/ompt_callback.cpp @@ -0,0 +1,427 @@ +//===-- ompt_callback.cpp - Target independent OpenMP target RTL -- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of OMPT callback interfaces for target independent layer +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +//**************************************************************************** +// local include files +//**************************************************************************** + +#include + +#include "ompt_callback.h" +#include "private.h" + +#include +#include + +/************************************************************************* + * macros + ************************************************************************/ + +#define fnptr_to_ptr(x) ((void *)(uint64_t)x) + +/***************************************************************************** + * global data + *****************************************************************************/ + +/// Used to indicate whether OMPT was enabled for this library +bool ompt_enabled = false; +/// Object maintaining all the callbacks for this library +OmptDeviceCallbacksTy ompt_device_callbacks; +/// Thread local state during execution for target region and associated +/// metadata +thread_local OmptInterface ompt_interface; + +namespace { + +/******************************************************************************* + * type declarations + *******************************************************************************/ + +/// Used to maintain the init and fini functions +class LibomptargetRtlFinalizer { +public: + LibomptargetRtlFinalizer() : Fn(nullptr) {} + void registerRtl(ompt_finalize_t _fn) { + assert(Fn == nullptr && "Finalizer already registered"); + Fn = _fn; + } + void finalize() { + if (Fn) + Fn(NULL); + Fn = nullptr; + } + +private: + ompt_finalize_t Fn; +}; + +typedef int (*ompt_set_frame_enter_t)(void *addr, int flags, int state); +typedef ompt_data_t *(*ompt_get_task_data_t)(); +typedef ompt_data_t *(*ompt_get_target_task_data_t)(); + +} // namespace + +/***************************************************************************** + * private data + *****************************************************************************/ + +static ompt_set_frame_enter_t ompt_set_frame_enter_fn = nullptr; +static ompt_get_task_data_t ompt_get_task_data_fn = nullptr; +static ompt_get_target_task_data_t ompt_get_target_task_data_fn = nullptr; + +static LibomptargetRtlFinalizer libomptarget_rtl_finalizer; + +static std::atomic unique_id_ticket(1); + +/***************************************************************************** + * Thread local data + *****************************************************************************/ + +/// Correlation id that is incremented with target operations +static thread_local uint64_t ompt_target_region_opid = 1; +// The following are thread local data structures that are passed to callbacks +static thread_local ompt_data_t ompt_target_data = ompt_data_none; +static thread_local ompt_data_t *ompt_task_data = nullptr; +static thread_local ompt_data_t *ompt_target_task_data = nullptr; +static thread_local ompt_id_t host_op_id = 0; + +/***************************************************************************** + * OMPT private operations + *****************************************************************************/ + +/// Used to create a new correlation id +static uint64_t id_create() { return unique_id_ticket.fetch_add(1); } + +static uint64_t opid_create() { + host_op_id = id_create(); + return host_op_id; +} + +static uint64_t opid_get() { return host_op_id; } + +static uint64_t regionid_create() { + ompt_target_data.value = id_create(); + return ompt_target_data.value; +} + +static uint64_t regionid_get() { return ompt_target_data.value; } + +static void LIBOMPTARGET_GET_TARGET_OPID(uint64_t *device_num, + ompt_id_t *target_id, + ompt_id_t *host_op_id) { + *host_op_id = ompt_target_region_opid; +} + +/***************************************************************************** + * OMPT callbacks + *****************************************************************************/ + +void OmptInterface::setStateHelper(void *Frame, void *Code, int Flags, + int State) { + EnterFrame = Frame; + Codeptr = Code; + if (ompt_set_frame_enter_fn) + InterfaceState = ompt_set_frame_enter_fn(Frame, Flags, State); +} + +void OmptInterface::setState(void *Frame, void *Codeptr) { + setStateHelper(Frame, Codeptr, OMPT_FRAME_FLAGS, ompt_state_work_parallel); +} + +void OmptInterface::clearState() { + setStateHelper(nullptr /* Frame */, nullptr /* Code */, 0 /* Flags */, + InterfaceState); +} + +void OmptInterface::beginTargetRegion() { + // set up task region state + ompt_task_data = ompt_get_task_data_fn(); + ompt_target_task_data = ompt_get_target_task_data_fn(); + + *ompt_task_data = ompt_data_none; + *ompt_target_task_data = ompt_data_none; + ompt_target_data = ompt_data_none; +} + +void OmptInterface::endTargetRegion() { + ompt_task_data = 0; + ompt_target_task_data = 0; + ompt_target_data = ompt_data_none; +} + +void OmptInterface::announceTargetRegion(const char *Name) { + DP("in OmptInterface::target_region_%s target_id=%lu\n", Name, + ompt_target_data.value); +} + +void OmptInterface::beginTargetOperation() { + DP("in ompt_target_region_begin (ompt_target_region_opid = %lu)\n", + ompt_target_data.value); +} + +void OmptInterface::endTargetOperation() { + DP("in ompt_target_region_end (ompt_target_region_opid = %lu)\n", + ompt_target_data.value); +} + +/***************************************************************************** + * OMPT public operations + *****************************************************************************/ + +// FIXME: optional implementation of target map? + +void OmptInterface::beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, + size_t Size, void *Code) { + ompt_device_callbacks.OmptCallbackTargetDataOpEmi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_alloc, HstPtrBegin, DeviceId, NULL, 0, Size, Code, + opid_create, &ompt_target_region_opid); + beginTargetOperation(); +} + +void OmptInterface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, + size_t Size, void *Code) { + ompt_device_callbacks.OmptCallbackTargetDataOpEmi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_alloc, HstPtrBegin, DeviceId, NULL, 0, Size, Code, + opid_get, &ompt_target_region_opid); + endTargetOperation(); +} + +void OmptInterface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin, + void *HstPtrBegin, size_t Size, + void *Code) { + ompt_device_callbacks.OmptCallbackTargetDataOpEmi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_to_device, HstPtrBegin, 0, TgtPtrBegin, + DeviceId, Size, Code, opid_create, &ompt_target_region_opid); + beginTargetOperation(); +} + +void OmptInterface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin, + void *HstPtrBegin, size_t Size, + void *Code) { + ompt_device_callbacks.OmptCallbackTargetDataOpEmi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_to_device, HstPtrBegin, 0 /* DeviceId */, + TgtPtrBegin, DeviceId, Size, Code, opid_get, &ompt_target_region_opid); + endTargetOperation(); +} + +void OmptInterface::beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, + void *Code) { + ompt_device_callbacks.OmptCallbackTargetDataOpEmi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_delete, TgtPtrBegin, DeviceId, NULL /* DstAddr */, + 0 /* DestDeviceNum */, 0 /* Bytes */, Code, opid_create, + &ompt_target_region_opid); + beginTargetOperation(); +} + +void OmptInterface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, + void *Code) { + ompt_device_callbacks.OmptCallbackTargetDataOpEmi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_delete, TgtPtrBegin, DeviceId, NULL /* DstAddr */, + 0 /* DestDeviceNum */, 0 /* Bytes */, Code, opid_get, + &ompt_target_region_opid); + endTargetOperation(); +} + +void OmptInterface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, + void *Code) { + ompt_device_callbacks.OmptCallbackTargetDataOpEmi( + ompt_scope_begin, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId, HstPtrBegin, + 0 /* DeviceId */, Size, Code, opid_create, &ompt_target_region_opid); + beginTargetOperation(); +} + +void OmptInterface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, + void *Code) { + ompt_device_callbacks.OmptCallbackTargetDataOpEmi( + ompt_scope_end, ompt_target_task_data, &ompt_target_data, + ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId, HstPtrBegin, + 0 /* Device Id */, Size, Code, opid_get, &ompt_target_region_opid); + endTargetOperation(); +} + +void OmptInterface::beginTargetSubmit(unsigned int numTeams) { + ompt_device_callbacks.OmptCallbackTargetSubmitEmi( + ompt_scope_begin, &ompt_target_data, numTeams, opid_create, + &ompt_target_region_opid); +} + +void OmptInterface::endTargetSubmit(unsigned int numTeams) { + ompt_device_callbacks.OmptCallbackTargetSubmitEmi( + ompt_scope_end, &ompt_target_data, numTeams, opid_get, + &ompt_target_region_opid); +} + +void OmptInterface::beginTargetDataEnter(int64_t DeviceId, void *Code) { + beginTargetRegion(); + ompt_device_callbacks.OmptCallbackTargetEmi( + ompt_target_enter_data, ompt_scope_begin, DeviceId, ompt_task_data, + ompt_target_task_data, &ompt_target_data, Code, regionid_create); +} + +void OmptInterface::endTargetDataEnter(int64_t DeviceId, void *Code) { + ompt_device_callbacks.OmptCallbackTargetEmi( + ompt_target_enter_data, ompt_scope_end, DeviceId, ompt_task_data, + ompt_target_task_data, &ompt_target_data, Code, regionid_get); + endTargetRegion(); +} + +void OmptInterface::beginTargetDataExit(int64_t DeviceId, void *Code) { + beginTargetRegion(); + ompt_device_callbacks.OmptCallbackTargetEmi( + ompt_target_exit_data, ompt_scope_begin, DeviceId, ompt_task_data, + ompt_target_task_data, &ompt_target_data, Code, regionid_create); + announceTargetRegion("begin"); +} + +void OmptInterface::endTargetDataExit(int64_t DeviceId, void *Code) { + ompt_device_callbacks.OmptCallbackTargetEmi( + ompt_target_exit_data, ompt_scope_end, DeviceId, ompt_task_data, + ompt_target_task_data, &ompt_target_data, Code, regionid_get); + endTargetRegion(); +} + +void OmptInterface::beginTargetUpdate(int64_t DeviceId, void *Code) { + beginTargetRegion(); + ompt_device_callbacks.OmptCallbackTargetEmi( + ompt_target_update, ompt_scope_begin, DeviceId, ompt_task_data, + ompt_target_task_data, &ompt_target_data, Code, regionid_create); + announceTargetRegion("begin"); +} + +void OmptInterface::endTargetUpdate(int64_t DeviceId, void *Code) { + ompt_device_callbacks.OmptCallbackTargetEmi( + ompt_target_update, ompt_scope_end, DeviceId, ompt_task_data, + ompt_target_task_data, &ompt_target_data, Code, regionid_get); + endTargetRegion(); +} + +void OmptInterface::beginTarget(int64_t DeviceId, void *Code) { + beginTargetRegion(); + ompt_device_callbacks.OmptCallbackTargetEmi( + ompt_target, ompt_scope_begin, DeviceId, ompt_task_data, + ompt_target_task_data, &ompt_target_data, Code, regionid_create); + announceTargetRegion("begin"); +} + +void OmptInterface::endTarget(int64_t DeviceId, void *Code) { + ompt_device_callbacks.OmptCallbackTargetEmi( + ompt_target, ompt_scope_end, DeviceId, ompt_task_data, + ompt_target_task_data, &ompt_target_data, Code, regionid_get); + endTargetRegion(); +} + +/***************************************************************************** + * OMPT device callback operations specific to libomptarget + *****************************************************************************/ + +const char *OmptDeviceCallbacksTy::Documentation = 0; + +// Today, this is not called from libomptarget +OmptDeviceTy *OmptDeviceCallbacksTy::lookupDevice(int DeviceNnum) { + DP("Lookup device should be invoked in the plugin\n"); + abort(); + return nullptr; +} + +ompt_interface_fn_t +OmptDeviceCallbacksTy::doLookup(const char *InterfaceFunctionName) { + if (strcmp(InterfaceFunctionName, + LIBOMPTARGET_STRINGIFY(LIBOMPTARGET_GET_TARGET_OPID)) == 0) + return (ompt_interface_fn_t)LIBOMPTARGET_GET_TARGET_OPID; + + return ompt_device_callbacks.lookupCallback(InterfaceFunctionName); +} + +#ifdef OMPT_SUPPORT +static int ompt_libomptarget_initialize(ompt_function_lookup_t lookup, + int initial_device_num, + ompt_data_t *tool_data) { + DP("enter ompt_libomptarget_initialize!\n"); + ompt_enabled = true; + +#define ompt_bind_name(fn) \ + fn##_fn = (fn##_t)lookup(#fn); \ + DP("%s=%p\n", #fn, fnptr_to_ptr(fn##_fn)); + + ompt_bind_name(ompt_set_frame_enter); + ompt_bind_name(ompt_get_task_data); + ompt_bind_name(ompt_get_target_task_data); +#undef ompt_bind_name + + ompt_device_callbacks.registerCallbacks(lookup); + DP("exit ompt_libomptarget_initialize!\n"); + return 0; +} + +static void ompt_libomptarget_finalize(ompt_data_t *data) { + DP("enter ompt_libomptarget_finalize!\n"); + libomptarget_rtl_finalizer.finalize(); + ompt_enabled = false; + DP("exit ompt_libomptarget_finalize!\n"); +} + +/***************************************************************************** + * constructor + *****************************************************************************/ +/// Used to initialize callbacks implemented by the tool. This interface +/// will lookup the callbacks table in libomp and assign them to the callbacks +/// maintained in libomptarget. +__attribute__((constructor(102))) static void ompt_init(void) { + /// Connect with libomp + static OmptLibraryConnectorTy LibompConnector("ompt_libomp"); + static ompt_start_tool_result_t OmptResult; + + // Initialize OmptResult with the init and fini functions that will be + // called by the connector + OmptResult.initialize = ompt_libomptarget_initialize; + OmptResult.finalize = ompt_libomptarget_finalize; + OmptResult.tool_data.value = 0; + + // Initialize the device callbacks first + ompt_device_callbacks.init(); + + // Now call connect that causes the above init/fini functions to be called + LibompConnector.connect(&OmptResult); + DP("OMPT: Exit ompt_init\n"); +} +#endif + +extern "C" { +/// Used for connecting libomptarget with a plugin +void ompt_libomptarget_connect(ompt_start_tool_result_t *result) { + DP("OMPT: Enter libomptarget_ompt_connect\n"); + if (ompt_enabled && result) { + // Cache the fini function so that it can be invoked on exit + libomptarget_rtl_finalizer.registerRtl(result->finalize); + // Invoke the provided init function with the lookup function maintained + // in this library so that callbacks maintained by this library are obtained + result->initialize(OmptDeviceCallbacksTy::doLookup, + 0 /* initial_device_num */, nullptr /* tool_data */); + } + DP("OMPT: Leave libomptarget_ompt_connect\n"); +} +} diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -20,6 +20,8 @@ #include #include +#include "ompt_callback.h" + int AsyncInfoTy::synchronize() { int Result = OFFLOAD_SUCCESS; if (AsyncInfo.Queue) { @@ -1544,6 +1546,9 @@ { TIMESCOPE_WITH_NAME_AND_IDENT( IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", loc); + + OMPT_IF_BUILT(OmptInterfaceTargetSubmitRAII TargetSubmitRAII(TeamNum)); + if (IsTeamConstruct) Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0], TgtArgs.size(), TeamNum, ThreadLimit, diff --git a/openmp/libomptarget/test/lit.cfg b/openmp/libomptarget/test/lit.cfg --- a/openmp/libomptarget/test/lit.cfg +++ b/openmp/libomptarget/test/lit.cfg @@ -64,6 +64,9 @@ if config.libomptarget_debug: config.available_features.add('libomptarget-debug') +if config.has_libomptarget_ompt: + config.available_features.add('ompt') + config.available_features.add(config.libomptarget_current_target) # Determine whether the test system supports unified memory. diff --git a/openmp/libomptarget/test/lit.site.cfg.in b/openmp/libomptarget/test/lit.site.cfg.in --- a/openmp/libomptarget/test/lit.site.cfg.in +++ b/openmp/libomptarget/test/lit.site.cfg.in @@ -18,6 +18,7 @@ config.libomptarget_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@" config.libomptarget_not = "@OPENMP_NOT_EXECUTABLE@" config.libomptarget_debug = @LIBOMPTARGET_DEBUG@ +config.has_libomptarget_ompt = @LIBOMPTARGET_OMPT_SUPPORT@ # Let the main config do the real work. lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/openmp/libomptarget/test/ompt/callbacks.h b/openmp/libomptarget/test/ompt/callbacks.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/callbacks.h @@ -0,0 +1,129 @@ +#include +#include + +// Tool related code below +#include + +// For EMI callbacks +ompt_id_t next_op_id = 0x8000000000000001; + +// OMPT callbacks + +// Synchronous callbacks +static void on_ompt_callback_device_initialize(int device_num, const char *type, + ompt_device_t *device, + ompt_function_lookup_t lookup, + const char *documentation) { + printf("Callback Init: device_num=%d type=%s device=%p lookup=%p doc=%p\n", + device_num, type, device, lookup, documentation); +} + +static void on_ompt_callback_device_finalize(int device_num) { + printf("Callback Fini: device_num=%d\n", device_num); +} + +static void on_ompt_callback_device_load(int device_num, const char *filename, + int64_t offset_in_file, + void *vma_in_file, size_t bytes, + void *host_addr, void *device_addr, + uint64_t module_id) { + printf("Callback Load: device_num:%d module_id:%lu filename:%s host_adddr:%p " + "device_addr:%p bytes:%lu\n", + device_num, module_id, filename, host_addr, device_addr, bytes); +} + +static void on_ompt_callback_target_data_op( + ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype, + void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, + size_t bytes, const void *codeptr_ra) { + assert(codeptr_ra != 0 && "Unexpected null codeptr"); + // Both src and dest must not be null + assert((src_addr != 0 || dest_addr != 0) && "Both src and dest addr null"); + printf(" Callback DataOp: target_id=%lu host_op_id=%lu optype=%d src=%p " + "src_device_num=%d " + "dest=%p dest_device_num=%d bytes=%lu code=%p\n", + target_id, host_op_id, optype, src_addr, src_device_num, dest_addr, + dest_device_num, bytes, codeptr_ra); +} + +static void on_ompt_callback_target(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_id_t target_id, + const void *codeptr_ra) { + assert(codeptr_ra != 0 && "Unexpected null codeptr"); + printf("Callback Target: target_id=%lu kind=%d endpoint=%d device_num=%d " + "code=%p\n", + target_id, kind, endpoint, device_num, codeptr_ra); +} + +static void on_ompt_callback_target_submit(ompt_id_t target_id, + ompt_id_t host_op_id, + unsigned int requested_num_teams) { + printf(" Callback Submit: target_id=%lu host_op_id=%lu req_num_teams=%d\n", + target_id, host_op_id, requested_num_teams); +} + +static void on_ompt_callback_target_map(ompt_id_t target_id, + unsigned int nitems, void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + printf("Target map callback is unimplemented\n"); + abort(); +} + +static void on_ompt_callback_target_data_op_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_task_data, + ompt_data_t *target_data, ompt_id_t *host_op_id, + ompt_target_data_op_t optype, void *src_addr, int src_device_num, + void *dest_addr, int dest_device_num, size_t bytes, + const void *codeptr_ra) { + assert(codeptr_ra != 0 && "Unexpected null codeptr"); + // Both src and dest must not be null + assert((src_addr != 0 || dest_addr != 0) && "Both src and dest addr null"); + if (endpoint == ompt_scope_begin) + *host_op_id = next_op_id++; + printf(" Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p " + "(0x%lx) target_data=%p (0x%lx) host_op_id=%p (0x%lx) src=%p " + "src_device_num=%d " + "dest=%p dest_device_num=%d bytes=%lu code=%p\n", + endpoint, optype, target_task_data, target_task_data->value, + target_data, target_data->value, host_op_id, *host_op_id, src_addr, + src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra); +} + +static void on_ompt_callback_target_emi(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_data_t *target_task_data, + ompt_data_t *target_data, + const void *codeptr_ra) { + assert(codeptr_ra != 0 && "Unexpected null codeptr"); + if (endpoint == ompt_scope_begin) + target_data->value = next_op_id++; + printf("Callback Target EMI: kind=%d endpoint=%d device_num=%d task_data=%p " + "(0x%lx) target_task_data=%p (0x%lx) target_data=%p (0x%lx) code=%p\n", + kind, endpoint, device_num, task_data, task_data->value, + target_task_data, target_task_data->value, target_data, + target_data->value, codeptr_ra); +} + +static void on_ompt_callback_target_submit_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_data, + ompt_id_t *host_op_id, unsigned int requested_num_teams) { + printf(" Callback Submit EMI: endpoint=%d req_num_teams=%d target_data=%p " + "(0x%lx) host_op_id=%p (0x%lx)\n", + endpoint, requested_num_teams, target_data, target_data->value, + host_op_id, *host_op_id); +} + +static void on_ompt_callback_target_map_emi(ompt_data_t *target_data, + unsigned int nitems, + void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + printf("Target map emi callback is unimplemented\n"); + abort(); +} diff --git a/openmp/libomptarget/test/ompt/register_both.h b/openmp/libomptarget/test/ompt/register_both.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_both.h @@ -0,0 +1,49 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op_emi); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_emi); + register_ompt_callback(ompt_callback_target_submit); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_emi.h b/openmp/libomptarget/test/ompt/register_emi.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_emi.h @@ -0,0 +1,47 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op_emi); + register_ompt_callback(ompt_callback_target_emi); + register_ompt_callback(ompt_callback_target_submit_emi); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_emi_map.h b/openmp/libomptarget/test/ompt/register_emi_map.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_emi_map.h @@ -0,0 +1,48 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op_emi); + register_ompt_callback(ompt_callback_target_emi); + register_ompt_callback(ompt_callback_target_submit_emi); + register_ompt_callback(ompt_callback_target_map_emi); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_no_device_init.h b/openmp/libomptarget/test/ompt/register_no_device_init.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_no_device_init.h @@ -0,0 +1,47 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + // If no device init callback is registered, the other callbacks won't be + // activated. + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_non_emi.h b/openmp/libomptarget/test/ompt/register_non_emi.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_non_emi.h @@ -0,0 +1,47 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_non_emi_map.h b/openmp/libomptarget/test/ompt/register_non_emi_map.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_non_emi_map.h @@ -0,0 +1,48 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + register_ompt_callback(ompt_callback_target_map); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/register_wrong_return.h b/openmp/libomptarget/test/ompt/register_wrong_return.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/register_wrong_return.h @@ -0,0 +1,47 @@ +#include + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; + +// Init functions +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + + if (!ompt_set_callback) + return 1; // failed but wrongly returning 1 + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + + return 0; // success but should return 1 according to the spec +} + +void ompt_finalize(ompt_data_t *tool_data) {} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize, + &ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/libomptarget/test/ompt/veccopy.c b/openmp/libomptarget/test/ompt/veccopy.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/veccopy.c @@ -0,0 +1,82 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-newDriver +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-newDriver + +/* + * Example OpenMP program that registers non-EMI callbacks + */ + +#include +#include + +#include "callbacks.h" +#include "register_non_emi.h" + +int main() { + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i = 0; i < N; i++) + a[i] = 0; + + for (i = 0; i < N; i++) + b[i] = i; + +#pragma omp target parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + +#pragma omp target teams distribute parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + + int rc = 0; + for (i = 0; i < N; i++) + if (a[i] != b[i]) { + rc++; + printf("Wrong value: a[%d]=%d\n", i, a[i]); + } + + if (!rc) + printf("Success\n"); + + return rc; +} + +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 + +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_disallow_both.c b/openmp/libomptarget/test/ompt/veccopy_disallow_both.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/veccopy_disallow_both.c @@ -0,0 +1,100 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-newDriver +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-newDriver + +/* + * Example OpenMP program that shows that both EMI and non-EMI + * callbacks cannot be registered for the same type. In the + * current implementation, the EMI callback overrides the non-EMI + * callback. + */ + +#include +#include + +#include "callbacks.h" +#include "register_both.h" + +int main() { + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i = 0; i < N; i++) + a[i] = 0; + + for (i = 0; i < N; i++) + b[i] = i; + +#pragma omp target parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + +#pragma omp target teams distribute parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + + int rc = 0; + for (i = 0; i < N; i++) + if (a[i] != b[i]) { + rc++; + printf("Wrong value: a[%d]=%d\n", i, a[i]); + } + + if (!rc) + printf("Success\n"); + + return rc; +} + +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_emi.c b/openmp/libomptarget/test/ompt/veccopy_emi.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/veccopy_emi.c @@ -0,0 +1,100 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-newDriver +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-newDriver + +/* + * Example OpenMP program that registers EMI callbacks + */ + +#include +#include +#include + +#include "callbacks.h" +#include "register_emi.h" + +int main() { + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i = 0; i < N; i++) + a[i] = 0; + + for (i = 0; i < N; i++) + b[i] = i; + +#pragma omp target parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + +#pragma omp target teams distribute parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + + int rc = 0; + for (i = 0; i < N; i++) + if (a[i] != b[i]) { + rc++; + printf("Wrong value: a[%d]=%d\n", i, a[i]); + } + + if (!rc) + printf("Success\n"); + + return rc; +} + +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 +/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=0 +/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=0 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_emi_map.c b/openmp/libomptarget/test/ompt/veccopy_emi_map.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/veccopy_emi_map.c @@ -0,0 +1,101 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-newDriver +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-newDriver + +/* + * Example OpenMP program that shows that map-EMI callbacks are not supported. + */ + +#include +#include +#include + +#include "callbacks.h" +#include "register_emi_map.h" + +int main() { + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i = 0; i < N; i++) + a[i] = 0; + + for (i = 0; i < N; i++) + b[i] = i; + +#pragma omp target parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + +#pragma omp target teams distribute parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + + int rc = 0; + for (i = 0; i < N; i++) + if (a[i] != b[i]) { + rc++; + printf("Wrong value: a[%d]=%d\n", i, a[i]); + } + + if (!rc) + printf("Success\n"); + + return rc; +} + +/// CHECK: 0: Could not register callback 'ompt_callback_target_map_emi' +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 +/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=0 +/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=0 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_map.c b/openmp/libomptarget/test/ompt/veccopy_map.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/veccopy_map.c @@ -0,0 +1,83 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-newDriver +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-newDriver + +/* + * Example OpenMP program that shows that map callbacks are not supported. + */ + +#include +#include + +#include "callbacks.h" +#include "register_non_emi_map.h" + +int main() { + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i = 0; i < N; i++) + a[i] = 0; + + for (i = 0; i < N; i++) + b[i] = i; + +#pragma omp target parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + +#pragma omp target teams distribute parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + + int rc = 0; + for (i = 0; i < N; i++) + if (a[i] != b[i]) { + rc++; + printf("Wrong value: a[%d]=%d\n", i, a[i]); + } + + if (!rc) + printf("Success\n"); + + return rc; +} + +/// CHECK: 0: Could not register callback 'ompt_callback_target_map' +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 + +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_no_device_init.c b/openmp/libomptarget/test/ompt/veccopy_no_device_init.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/veccopy_no_device_init.c @@ -0,0 +1,83 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-newDriver +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-newDriver + +/* + * Example OpenMP program that shows that if no device init callback + * is registered, the other callbacks won't be activated. + */ + +#include +#include + +#include "callbacks.h" +#include "register_no_device_init.h" + +int main() { + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i = 0; i < N; i++) + a[i] = 0; + + for (i = 0; i < N; i++) + b[i] = i; + +#pragma omp target parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + +#pragma omp target teams distribute parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + + int rc = 0; + for (i = 0; i < N; i++) + if (a[i] != b[i]) { + rc++; + printf("Wrong value: a[%d]=%d\n", i, a[i]); + } + + if (!rc) + printf("Success\n"); + + return rc; +} + +/// CHECK-NOT: Callback Init: +/// CHECK-NOT: Callback Load: +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 + +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 +/// CHECK-NOT: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_wrong_return.c b/openmp/libomptarget/test/ompt/veccopy_wrong_return.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/ompt/veccopy_wrong_return.c @@ -0,0 +1,83 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-newDriver +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-newDriver + +/* + * Example OpenMP program that shows that if the initialize function + * returns the wrong status code, the callbacks won't be activated. + */ + +#include +#include + +#include "callbacks.h" +#include "register_wrong_return.h" + +int main() { + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i = 0; i < N; i++) + a[i] = 0; + + for (i = 0; i < N; i++) + b[i] = i; + +#pragma omp target parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + +#pragma omp target teams distribute parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } + + int rc = 0; + for (i = 0; i < N; i++) + if (a[i] != b[i]) { + rc++; + printf("Wrong value: a[%d]=%d\n", i, a[i]); + } + + if (!rc) + printf("Success\n"); + + return rc; +} + +/// CHECK-NOT: Callback Init: +/// CHECK-NOT: Callback Load: +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 + +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 +/// CHECK-NOT: Callback Fini: diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -330,6 +330,8 @@ endif() endif() +set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE) + # Check if HWLOC support is available if(${LIBOMP_USE_HWLOC}) set(CMAKE_REQUIRED_INCLUDES ${LIBOMP_HWLOC_INSTALL_DIR}/include) diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt --- a/openmp/runtime/src/exports_so.txt +++ b/openmp/runtime/src/exports_so.txt @@ -26,6 +26,7 @@ # OMPT API # ompt_start_tool; # OMPT start interface + ompt_libomp_connect; # OMPT libomptarget interface ompc_*; # omp.h renames some standard functions to ompc_*. kmp_*; # Intel extensions. diff --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var --- a/openmp/runtime/src/include/omp-tools.h.var +++ b/openmp/runtime/src/include/omp-tools.h.var @@ -108,7 +108,7 @@ macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \ macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */ -#define FOREACH_OMPT_EVENT(macro) \ +#define FOREACH_OMPT_HOST_EVENT(macro) \ \ /*--- Mandatory Events ---*/ \ macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ @@ -121,17 +121,9 @@ macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ \ - macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ - macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ - macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ \ macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ \ - macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ - macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ - \ - macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ - macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ \ /* Optional Events */ \ macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \ @@ -145,7 +137,6 @@ \ macro (ompt_callback_masked, ompt_callback_masked_t, 21) /* task at masked begin or end */ \ \ - macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ \ macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \ \ @@ -164,11 +155,48 @@ macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \ \ macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */ \ + macro (ompt_callback_error, ompt_callback_error_t, 37) /* error */ + + +#define FOREACH_OMPT_DEVICE_EVENT(macro) \ + macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ + macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ + \ + macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ + macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ + + +#define FOREACH_OMPT_NOEMI_EVENT(macro) \ + macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ + macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ + macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ + macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ + + +#define FOREACH_OMPT_EMI_EVENT(macro) \ macro (ompt_callback_target_emi, ompt_callback_target_emi_t, 33) /* target */ \ macro (ompt_callback_target_data_op_emi,ompt_callback_target_data_op_emi_t,34) /* target data op */ \ macro (ompt_callback_target_submit_emi, ompt_callback_target_submit_emi_t, 35) /* target submit */ \ macro (ompt_callback_target_map_emi, ompt_callback_target_map_emi_t, 36) /* target map */ \ - macro (ompt_callback_error, ompt_callback_error_t, 37) /* error */ + +#define FOREACH_OMPT_50_TARGET_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) + +#define FOREACH_OMPT_51_TARGET_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +#define FOREACH_OMPT_EVENT(macro) \ + FOREACH_OMPT_HOST_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +#define FOREACH_OMPT_51_EVENT(macro) \ + FOREACH_OMPT_HOST_EVENT(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) /***************************************************************************** * implementation specific types diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -55,19 +55,19 @@ #define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_emi_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_target_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_emi_implemented ompt_event_MAY_ALWAYS #define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_device_initialize_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_device_finalize_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_initialize_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_device_finalize_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_device_load_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_load_implemented ompt_event_MAY_ALWAYS #define ompt_callback_device_unload_implemented ompt_event_UNIMPLEMENTED /*---------------------------------------------------------------------------- diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -109,6 +109,7 @@ static void *ompt_tool_module = NULL; #define OMPT_DLCLOSE(Lib) dlclose(Lib) #endif +static ompt_start_tool_result_t *libomptarget_ompt_result = NULL; /***************************************************************************** * forward declarations @@ -456,7 +457,7 @@ if (verbose_init && verbose_file != stderr && verbose_file != stdout) fclose(verbose_file); #if OMPT_DEBUG - printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled); + printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled.enabled); #endif } @@ -495,8 +496,8 @@ ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( ompt_thread_initial, __ompt_get_thread_data_internal()); } - ompt_data_t *task_data; - ompt_data_t *parallel_data; + ompt_data_t *task_data = nullptr; + ompt_data_t *parallel_data = nullptr; __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL); if (ompt_enabled.ompt_callback_implicit_task) { @@ -509,12 +510,13 @@ } void ompt_fini() { - if (ompt_enabled.enabled -#if OMPD_SUPPORT - && ompt_start_tool_result && ompt_start_tool_result->finalize -#endif - ) { - ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data)); + if (ompt_enabled.enabled) { + if (ompt_start_tool_result && ompt_start_tool_result->finalize) { + ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data)); + } + if (libomptarget_ompt_result && libomptarget_ompt_result->finalize) { + libomptarget_ompt_result->finalize(NULL); + } } if (ompt_tool_module) @@ -869,5 +871,57 @@ FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) - return NULL; +#undef ompt_interface_fn + + return (ompt_interface_fn_t)0; +} + +static int ompt_set_frame_enter(void *addr, int flags, int state) { + return __ompt_set_frame_enter_internal(addr, flags, state); +} + +static ompt_data_t *ompt_get_task_data() { return __ompt_get_task_data(); } + +static ompt_data_t *ompt_get_target_task_data() { + return __ompt_get_target_task_data(); +} + +static ompt_interface_fn_t ompt_libomp_target_fn_lookup(const char *s) { +#define provide_fn(fn) \ + if (strcmp(s, #fn) == 0) \ + return (ompt_interface_fn_t)fn; + + provide_fn(ompt_set_frame_enter); + provide_fn(ompt_get_task_data); + provide_fn(ompt_get_target_task_data); + +#define ompt_interface_fn(fn, type, code) \ + if (strcmp(s, #fn) == 0) \ + return (ompt_interface_fn_t)ompt_callbacks.ompt_callback(fn); + + FOREACH_OMPT_DEVICE_EVENT(ompt_interface_fn) + FOREACH_OMPT_EMI_EVENT(ompt_interface_fn) + FOREACH_OMPT_NOEMI_EVENT(ompt_interface_fn) + +#undef ompt_interface_fn + + return (ompt_interface_fn_t)0; +} + +_OMP_EXTERN void ompt_libomp_connect(ompt_start_tool_result_t *result) { + OMPT_VERBOSE_INIT_PRINT("libomp --> OMPT: Enter libomp_ompt_connect\n"); + + __ompt_force_initialization(); + + if (ompt_enabled.enabled && + ompt_callbacks.ompt_callback(ompt_callback_device_initialize)) { + if (result) { + OMPT_VERBOSE_INIT_PRINT( + "libomp --> OMPT: Connecting with libomptarget\n"); + result->initialize(ompt_libomp_target_fn_lookup, + 0 /* initial_device_num */, nullptr /* tool_data */); + libomptarget_ompt_result = result; + } + } + OMPT_VERBOSE_INIT_PRINT("libomp --> OMPT: Exit libomp_ompt_connect\n"); } diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -13,6 +13,8 @@ #ifndef __OMPT_INTERNAL_H__ #define __OMPT_INTERNAL_H__ +#include "kmp_platform.h" + #include "ompt-event-specific.h" #include "omp-tools.h" @@ -24,6 +26,16 @@ ((x == fork_context_gnu) ? ompt_parallel_invoker_program \ : ompt_parallel_invoker_runtime) +#define OMPT_FRAME_SET(frame, which, ptr_value, flags) \ + { \ + frame->which##_frame.ptr = ptr_value; \ + frame->which##_frame_flags = flags; \ + } + +#define OMPT_FRAME_CLEAR(frame, which) OMPT_FRAME_SET(frame, which, 0, 0) + +#define OMPT_FRAME_SET_P(frame, which) (frame->which##_frame.ptr != NULL) + #define ompt_callback(e) e##_callback typedef struct ompt_callbacks_internal_s { @@ -76,6 +88,7 @@ ompt_data_t thread_data; ompt_data_t task_data; /* stored here from implicit barrier-begin until implicit-task-end */ + ompt_data_t target_task_data; void *return_address; /* stored here on entry of runtime */ ompt_state_t state; ompt_wait_id_t wait_id; diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h --- a/openmp/runtime/src/ompt-specific.h +++ b/openmp/runtime/src/ompt-specific.h @@ -20,7 +20,12 @@ * forward declarations ****************************************************************************/ +void __ompt_force_initialization(); + +int __ompt_set_frame_enter_internal(void *addr, int flags, int state); + void __ompt_team_assign_id(kmp_team_t *team, ompt_data_t ompt_pid); + void __ompt_thread_assign_wait_id(void *variable); void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, @@ -33,6 +38,10 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); +ompt_data_t *__ompt_get_task_data(); + +ompt_data_t *__ompt_get_target_task_data(); + ompt_task_info_t *__ompt_get_task_info_object(int depth); int __ompt_get_parallel_info_internal(int ancestor_level, @@ -57,12 +66,12 @@ * macros ****************************************************************************/ -#define OMPT_CUR_TASK_INFO(thr) (&(thr->th.th_current_task->ompt_task_info)) +#define OMPT_CUR_TASK_INFO(thr) (&((thr)->th.th_current_task->ompt_task_info)) #define OMPT_CUR_TASK_DATA(thr) \ - (&(thr->th.th_current_task->ompt_task_info.task_data)) -#define OMPT_CUR_TEAM_INFO(thr) (&(thr->th.th_team->t.ompt_team_info)) + (&((thr)->th.th_current_task->ompt_task_info.task_data)) +#define OMPT_CUR_TEAM_INFO(thr) (&((thr)->th.th_team->t.ompt_team_info)) #define OMPT_CUR_TEAM_DATA(thr) \ - (&(thr->th.th_team->t.ompt_team_info.parallel_data)) + (&((thr)->th.th_team->t.ompt_team_info.parallel_data)) #define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI @@ -79,7 +88,8 @@ if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \ !__kmp_threads[gtid]->th.ompt_thread_info.return_address) \ __kmp_threads[gtid]->th.ompt_thread_info.return_address = \ - __builtin_return_address(0)*/ + __builtin_return_address(0)*/ + #define OMPT_STORE_RETURN_ADDRESS(gtid) \ OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)}; #define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid) diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp --- a/openmp/runtime/src/ompt-specific.cpp +++ b/openmp/runtime/src/ompt-specific.cpp @@ -188,6 +188,11 @@ //****************************************************************************** // interface operations //****************************************************************************** +//---------------------------------------------------------- +// initialization support +//---------------------------------------------------------- + +void __ompt_force_initialization() { __kmp_serial_initialize(); } //---------------------------------------------------------- // thread support @@ -260,7 +265,9 @@ lwt->ompt_team_info.master_return_address = codeptr; lwt->ompt_task_info.task_data.value = 0; lwt->ompt_task_info.frame.enter_frame = ompt_data_none; + lwt->ompt_task_info.frame.enter_frame_flags = 0; lwt->ompt_task_info.frame.exit_frame = ompt_data_none; + lwt->ompt_task_info.frame.exit_frame_flags = 0; lwt->ompt_task_info.scheduling_parent = NULL; lwt->heap = 0; lwt->parent = 0; @@ -339,6 +346,16 @@ // task support //---------------------------------------------------------- +ompt_data_t *__ompt_get_task_data() { + kmp_info_t *thr = ompt_get_thread(); + ompt_data_t *task_data = thr ? OMPT_CUR_TASK_DATA(thr) : NULL; + return task_data; +} + +ompt_data_t *__ompt_get_target_task_data() { + return &__kmp_threads[__kmp_get_gtid()]->th.ompt_thread_info.target_task_data; +} + int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, ompt_frame_t **task_frame, @@ -479,6 +496,21 @@ return 1; } +//---------------------------------------------------------- +// target region support +//---------------------------------------------------------- + +int __ompt_set_frame_enter_internal(void *addr, int flags, int state) { + int gtid = __kmp_entry_gtid(); + kmp_info_t *thr = __kmp_threads[gtid]; + + ompt_frame_t *ompt_frame = &OMPT_CUR_TASK_INFO(thr)->frame; + OMPT_FRAME_SET(ompt_frame, enter, addr, flags); + int old_state = thr->th.ompt_thread_info.state; + thr->th.ompt_thread_info.state = ompt_state_work_parallel; + return old_state; +} + //---------------------------------------------------------- // team support //----------------------------------------------------------