diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -65,9 +65,9 @@ else() option(LIBOMPTARGET_ENABLE_DEBUG "Allow debug output with the environment variable LIBOMPTARGET_DEBUG=1" OFF) endif() -if(LIBOMPTARGET_ENABLE_DEBUG) +# if(LIBOMPTARGET_ENABLE_DEBUG) add_definitions(-DOMPTARGET_DEBUG) -endif() +# endif() # OMPT support for libomptarget # Follow host OMPT support and check if host support has been requested. diff --git a/openmp/libomptarget/include/OmptCallback.h b/openmp/libomptarget/include/OmptCallback.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/OmptCallback.h @@ -0,0 +1,92 @@ +//===---- OmptCallback.h - Target independent OMPT callbacks --*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Interface used by target-independent runtimes to coordinate registration and +// invocation of OMPT callbacks and initialization / finalization. +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTCALLBACK_H +#define _OMPTCALLBACK_H + +#ifdef OMPT_SUPPORT + +#include "omp-tools.h" + +#define DEBUG_PREFIX "OMPT" + +#define FOREACH_OMPT_TARGET_CALLBACK(macro) \ + FOREACH_OMPT_DEVICE_EVENT(macro) \ + FOREACH_OMPT_NOEMI_EVENT(macro) \ + FOREACH_OMPT_EMI_EVENT(macro) + +#define ompt_callback(name, ...) \ + do { \ + if (ompt_callback_##name##_fn) { \ + printf("Executing: ompt_callback_%s_fn\n", #name); \ + ompt_callback_##name##_fn(__VA_ARGS__); \ + } else \ + printf("NOT Executing: ompt_callback_%s_fn\n", #name); \ + } while (0) + +/// Function type def used for maintaining unique target region, target +/// operations ids +typedef uint64_t (*IdInterfaceTy)(); + +namespace llvm { +namespace omp { +namespace target { +namespace ompt { + +#define OmptDeclareCallback(Name, Type, Code) extern Name##_t Name##_fn; +FOREACH_OMPT_NOEMI_EVENT(OmptDeclareCallback) +FOREACH_OMPT_EMI_EVENT(OmptDeclareCallback) +#undef OmptDeclareCallback + +/// This function will call an OpenMP API function. Which in turn will lookup a +/// given enum value of type \p ompt_callbacks_t and copy the address of the +/// corresponding callback funtion into the provided pointer. +/// The pointer to the runtime function is passed during 'initializeLibrary'. +/// \p which the enum value of the requested callback function +/// \p callback the destination pointer where the address shall be copied +extern ompt_get_callback_t lookupCallbackByCode; + +/// Lookup function to be used by the lower layer (e.g. the plugin). This +/// function has to be provided when actually calling callback functions like +/// 'ompt_callback_device_initialize_fn' (param: 'lookup'). +/// The pointer to the runtime function is passed during 'initializeLibrary'. +/// \p InterfaceFunctionName the name of the OMPT callback function to look up +extern ompt_function_lookup_t lookupCallbackByName; + +/// This is the function called by the higher layer (libomp / libomtarget) +/// responsible for initializing OMPT in this library. This is passed to libomp +/// as part of the OMPT connector object. +/// \p lookup to be used to query callbacks registered with libomp +/// \p initial_device_num initial device num (id) provided by libomp +/// \p tool_data as provided by the tool +int initializeLibrary(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data); + +/// This function is passed to libomp / libomtarget as part of the OMPT +/// connector object. It is called by libomp during finalization of OMPT in +/// libomptarget -OR- by libomptarget during finalization of OMPT in the plugin. +/// \p tool_data as provided by the tool +void finalizeLibrary(ompt_data_t *tool_data); + +/// This function will connect the \p initializeLibrary and \p finalizeLibrary +/// functions to their respective higher layer. +void connectLibrary(); + +} // namespace ompt +} // namespace target +} // namespace omp +} // namespace llvm + +#endif // OMPT_SUPPORT + +#endif // _OMPTCALLBACK_H \ No newline at end of file diff --git a/openmp/libomptarget/include/ompt_connector.h b/openmp/libomptarget/include/ompt_connector.h --- a/openmp/libomptarget/include/ompt_connector.h +++ b/openmp/libomptarget/include/ompt_connector.h @@ -101,7 +101,7 @@ std::string LibIdent; }; -#undef DEBUG_PREFIX +// #undef DEBUG_PREFIX #endif // OMPT_SUPPORT diff --git a/openmp/libomptarget/include/ompt_device_callbacks.h b/openmp/libomptarget/include/ompt_device_callbacks.h --- a/openmp/libomptarget/include/ompt_device_callbacks.h +++ b/openmp/libomptarget/include/ompt_device_callbacks.h @@ -1,4 +1,4 @@ -//===--------- ompt_device_callbacks.h - OMPT callbacks -- C++ ----------===// +//===---------- ompt_device_callbacks.h - OMPT callbacks -- C++ -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -17,7 +17,9 @@ #ifdef OMPT_SUPPORT #include "Debug.h" +#include #include +#include #define DEBUG_PREFIX "OMPT" @@ -26,27 +28,244 @@ FOREACH_OMPT_NOEMI_EVENT(macro) \ FOREACH_OMPT_EMI_EVENT(macro) +/// Function type def used for maintaining unique target region, target +/// operations ids +typedef uint64_t (*IdInterfaceTy)(); + +/// Internal representation for OMPT device +class OmptDeviceTy { +public: + OmptDeviceTy() { Enabled.store(false); } + bool initialize() { + bool Old = false; + return Enabled.compare_exchange_strong(Old, true); + } + bool finalize() { + bool Old = true; + return Enabled.compare_exchange_strong(Old, false); + } + +private: + std::atomic Enabled; +}; + /// Internal representation for OMPT device callback functions. class OmptDeviceCallbacksTy { public: + /// Invoked when a device is initialized + void OmptCallbackDeviceInitialize(int DeviceNum, const char *Type) { + if (ompt_callback_device_initialize_fn) { + OmptDeviceTy *Device = lookupDevice(DeviceNum); + if (Device && Device->initialize()) { + // Invoke the tool supplied device init callback + ompt_callback_device_initialize_fn( + DeviceNum, Type, reinterpret_cast(Device), + doLookup, Documentation); + } + } + } + + /// Invoked when a device is finalized + void OmptCallbackDeviceFinalize(int DeviceNum) { + if (ompt_callback_device_finalize_fn) { + OmptDeviceTy *Device = lookupDevice(DeviceNum); + if (Device && Device->finalize()) { + // Invoke the tool supplied device finalize callback + ompt_callback_device_finalize_fn(DeviceNum); + } + } + } + + /// Invoked when a device image is loaded + void OmptCallbackDeviceLoad(int DeviceNum, const char *Filename, + int64_t OffsetInFile, void *VmaInFile, + size_t Bytes, void *HostAddr, void *DeviceAddr, + uint64_t ModuleId) { + if (ompt_callback_device_load_fn) { + // Invoke the tool supplied device load callback + ompt_callback_device_load_fn(DeviceNum, Filename, OffsetInFile, VmaInFile, + Bytes, HostAddr, DeviceAddr, ModuleId); + } + } + + /// Invoked when a device image is unloaded + void OmptCallbackDeviceUnload(int DeviceNum, uint64_t ModuleId) { + if (ompt_callback_device_unload_fn) { + // Invoke the tool supplied device unload callback + ompt_callback_device_unload_fn(DeviceNum, ModuleId); + } + } + + /// Invoked when a data transfer is initiated regardless of whether the + /// external monitoring interface (EMI) callback is registered + void OmptCallbackTargetDataOpEmi(ompt_scope_endpoint_t EndPoint, + ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DestAddr, + int DestDeviceNum, size_t Bytes, + const void *CodePtrRA, + IdInterfaceTy IdInterface) { + // If the tool registered the EMI callback, invoke that. Note that the EMI + // function is invoked if both emi and non-emi are registered. + if (ompt_callback_target_data_op_emi_fn) { + // HostOpId will be set by the tool. Invoke the tool supplied data op EMI + // callback + ompt_callback_target_data_op_emi_fn( + EndPoint, TargetTaskData, TargetData, HostOpId, OpType, SrcAddr, + SrcDeviceNum, DestAddr, DestDeviceNum, Bytes, CodePtrRA); + } else if (EndPoint == ompt_scope_begin) { + // For non-EMI callback, proceed for begin scope only + OmptCallbackTargetDataOp(TargetData->value, OpType, SrcAddr, SrcDeviceNum, + DestAddr, DestDeviceNum, Bytes, CodePtrRA, + IdInterface, HostOpId); + } + } + + /// Invoked when a data transfer is initiated and the non-EMI callback is + /// registered + void OmptCallbackTargetDataOp(ompt_id_t TargetId, ompt_id_t *HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DestAddr, + int DestDeviceNum, size_t Bytes, + const void *CodePtrRA, + IdInterfaceTy IdInterface) { + if (ompt_callback_target_data_op_fn) { + // HostOpId is set by the runtime + *HostOpId = IdInterface(); + // Invoke the tool supplied data op callback + ompt_callback_target_data_op_fn(TargetId, *HostOpId, OpType, SrcAddr, + SrcDeviceNum, DestAddr, DestDeviceNum, + Bytes, CodePtrRA); + } + } + + /// Invoked when a target region is executed regardless of whether the + /// external monitoring interface (EMI) callback is registered + void OmptCallbackTargetEmi(ompt_target_t Kind, ompt_scope_endpoint_t EndPoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, const void *CodePtrRA, + IdInterfaceTy IdInterface) { + if (ompt_callback_target_emi_fn) { + // Invoke the tool supplied target EMI callback + ompt_callback_target_emi_fn(Kind, EndPoint, DeviceNum, TaskData, + TargetTaskData, TargetData, CodePtrRA); + } else { + OmptCallbackTarget(Kind, EndPoint, DeviceNum, TaskData, CodePtrRA, + TargetData, IdInterface); + } + } + + /// Invoked when a target region is executed and the EMI callback is not + /// registered + void OmptCallbackTarget(ompt_target_t Kind, ompt_scope_endpoint_t EndPoint, + int DeviceNum, ompt_data_t *TaskData, + const void *CodePtrRA, ompt_data_t *TargetData, + IdInterfaceTy IdInterface) { + // If we reach this point, ompt_callback_target_emi was not + // invoked so a tool didn't provide a target id. Thus, we must + // unconditionally get an id here, even if there is no + // OMPT callback target registered. We need to have an id for use by other + // callbacks. + // Note: On a scope_begin callback, IdInterface() will generate an id. + // On a scope_end callback, IdInterface() will return the existing + // id. It is safe to do the assignment again to TargetData->value. + TargetData->value = IdInterface(); + if (ompt_callback_target_fn) { + // Invoke the tool supplied target callback + ompt_callback_target_fn(Kind, EndPoint, DeviceNum, TaskData, + TargetData->value, CodePtrRA); + } + } + + /// Invoked when a target map clause is executed regardless of whether the + /// external monitoring interface (EMI) callback is registered + void OmptCallbackTargetMapEmi(ompt_data_t *TargetData, unsigned int NItems, + void **HostAddr, void **DeviceAddr, + size_t *Bytes, unsigned int *MappingFlags, + const void *CodePtrRA) { + if (ompt_callback_target_map_emi_fn) { + // Invoke the tool supplied map EMI callback + ompt_callback_target_map_emi_fn(TargetData, NItems, HostAddr, DeviceAddr, + Bytes, MappingFlags, CodePtrRA); + } else { + OmptCallbackTargetMap(TargetData->value, NItems, HostAddr, DeviceAddr, + Bytes, MappingFlags, CodePtrRA); + } + } + + /// Invoked when a target map clause is executed and the EMI callback is not + /// registered + void OmptCallbackTargetMap(ompt_id_t TargetId, unsigned int NItems, + void **HostAddr, void **DeviceAddr, size_t *Bytes, + unsigned int *MappingFlags, + const void *CodePtrRA) { + if (ompt_callback_target_map_fn) { + // Invoke the tool supplied map callback + ompt_callback_target_map_fn(TargetId, NItems, HostAddr, DeviceAddr, Bytes, + MappingFlags, CodePtrRA); + } + } + + /// Invoked when a target submit is executed regardless of whether the + /// external monitoring interface (EMI) callback is registered + void OmptCallbackTargetSubmitEmi(ompt_scope_endpoint_t EndPoint, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams, + IdInterfaceTy IdInterface) { + if (ompt_callback_target_submit_emi_fn) { + // HostOpId is set by the tool. Invoke the tool supplied target submit EMI + // callback + ompt_callback_target_submit_emi_fn(EndPoint, TargetData, HostOpId, + RequestedNumTeams); + } else if (EndPoint == ompt_scope_begin) { + // For non-EMI callback, proceed for begin scope only + OmptCallbackTargetSubmit(TargetData->value, RequestedNumTeams, + IdInterface, HostOpId); + } + } + + /// Invoked when a target submit is executed and the EMI callback is not + /// registered + void OmptCallbackTargetSubmit(ompt_id_t TargetId, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams, + IdInterfaceTy IdInterface) { + if (ompt_callback_target_submit_fn) { + // HostOpId is set by the runtime. + *HostOpId = IdInterface(); + // Invoke the tool supplied target submit callback + ompt_callback_target_submit_fn(TargetId, *HostOpId, RequestedNumTeams); + } + } + /// Initialize the enabled flag and all the callbacks + /* void init() { Enabled = false; #define initName(Name, Type, Code) Name##_fn = 0; FOREACH_OMPT_TARGET_CALLBACK(initName) #undef initName } + */ + + /* + bool isEnabled() { return Enabled; } + */ /// Used to register callbacks. \p Lookup is used to query a given callback /// by name and the result is assigned to the corresponding callback function. void registerCallbacks(ompt_function_lookup_t Lookup) { - Enabled = true; + // Enabled = true; #define OmptBindCallback(Name, Type, Code) \ Name##_fn = (Name##_t)Lookup(#Name); \ - DP("OMPT: class bound %s=%p\n", #Name, ((void *)(uint64_t)Name##_fn)); + DP("OMPT: class bound %s=%p\n", #Name, (void *)(Name##_fn)); FOREACH_OMPT_TARGET_CALLBACK(OmptBindCallback); #undef OmptBindCallback + printf("Executed 'registerCallbacks' on OmptDeviceCallbacksTy -- obj=%p " + "-- Lookup=%p\n", + this, Lookup); } /// Used to find a callback given its name @@ -65,18 +284,23 @@ private: /// Set to true if callbacks for this library have been initialized - bool Enabled; + // bool Enabled = false; + /// Number of allocated devices + // static int NumAllocatedDevices; /// Callback functions #define DeclareName(Name, Type, Code) Name##_t Name##_fn; FOREACH_OMPT_TARGET_CALLBACK(DeclareName) #undef DeclareName + + /// Documentation based on omp-tools + // static const char *Documentation; }; /// Device callbacks object for the library that performs the instantiation extern OmptDeviceCallbacksTy OmptDeviceCallbacks; -#undef DEBUG_PREFIX +// #undef DEBUG_PREFIX #endif // OMPT_SUPPORT diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -23,6 +23,7 @@ #include "Debug.h" #include "DeviceEnvironment.h" #include "GlobalHandler.h" +#include "OmptCallback.h" #include "PluginInterface.h" #include "Utilities.h" #include "UtilitiesRTL.h" @@ -2359,6 +2360,10 @@ // HSA functions from now on, e.g., hsa_shut_down. Initialized = true; +#ifdef OMPT_SUPPORT + ompt::connectLibrary(); +#endif + // Register event handler to detect memory errors on the devices. Status = hsa_amd_register_system_event_handler(eventHandler, nullptr); if (auto Err = Plugin::check( diff --git a/openmp/libomptarget/plugins-nextgen/common/OMPT/OmptCallback.cpp b/openmp/libomptarget/plugins-nextgen/common/OMPT/OmptCallback.cpp --- a/openmp/libomptarget/plugins-nextgen/common/OMPT/OmptCallback.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/OMPT/OmptCallback.cpp @@ -11,73 +11,59 @@ //===----------------------------------------------------------------------===// #ifdef OMPT_SUPPORT -#include -#include -#include -#include + +#include "llvm/Support/DynamicLibrary.h" + +#include +#include +#include #include "Debug.h" +#include "OmptCallback.h" #include "ompt_connector.h" -#include "ompt_device_callbacks.h" -/// Object maintaining all the callbacks in the plugin -OmptDeviceCallbacksTy OmptDeviceCallbacks; +using namespace llvm::omp::target::ompt; -/// Lookup function used for querying callback functions maintained -/// by the plugin -ompt_interface_fn_t -OmptDeviceCallbacksTy::doLookup(const char *InterfaceFunctionName) { - // TODO This will be populated with device tracing functions - return (ompt_interface_fn_t) nullptr; -} +ompt_get_callback_t llvm::omp::target::ompt::lookupCallbackByCode = 0; +ompt_function_lookup_t llvm::omp::target::ompt::lookupCallbackByName = 0; + +int llvm::omp::target::ompt::initializeLibrary(ompt_function_lookup_t lookup, + int initial_device_num, + ompt_data_t *tool_data) { + printf("OMPT: Executing initializeLibrary (libomptarget)\n"); +#define BindOmptFunctionName(srcFn, dstFn) \ + dstFn = (srcFn##_t)lookup(#srcFn); \ + printf("OMPT: initializeLibrary (libomptarget) bound %s=%p\n", #dstFn, \ + (void *)(dstFn)); -/// Used to indicate whether OMPT was enabled for this library -static bool OmptEnabled = false; + BindOmptFunctionName(ompt_get_callback, lookupCallbackByCode); +#undef BindOmptFunctionName + + // Store pointer of 'ompt_libomp_target_fn_lookup' for use by the plugin + lookupCallbackByName = lookup; -/// This function is passed to libomptarget as part of the OMPT connector -/// object. It is called by libomptarget during initialization of OMPT in the -/// plugin. \p lookup to be used to query callbacks registered with libomptarget -/// \p initial_device_num Initial device num provided by libomptarget -/// \p tool_data as provided by the tool -static int OmptDeviceInit(ompt_function_lookup_t lookup, int initial_device_num, - ompt_data_t *tool_data) { - DP("OMPT: Enter OmptDeviceInit\n"); - OmptEnabled = true; - // The lookup parameter is provided by libomptarget which already has the tool - // callbacks registered at this point. The registration call below causes the - // same callback functions to be registered in the plugin as well. - OmptDeviceCallbacks.registerCallbacks(lookup); - DP("OMPT: Exit OmptDeviceInit\n"); return 0; } -/// This function is passed to libomptarget as part of the OMPT connector -/// object. It is called by libomptarget during finalization of OMPT in the -/// plugin. -static void OmptDeviceFini(ompt_data_t *tool_data) { - DP("OMPT: Executing OmptDeviceFini\n"); +void llvm::omp::target::ompt::finalizeLibrary(ompt_data_t *tool_data) { + DP("OMPT: Executing finalizeLibrary (libomptarget)\n"); } -/// Used to initialize callbacks implemented by the tool. This interface will -/// lookup the callbacks table in libomptarget and assign them to the callbacks -/// table maintained in the calling plugin library. -void OmptCallbackInit() { - DP("OMPT: Entering OmptCallbackInit\n"); - /// Connect plugin instance with libomptarget +void llvm::omp::target::ompt::connectLibrary() { + DP("OMPT: Entering connectLibrary (libomptarget)\n"); + // Connect with libomptarget static OmptLibraryConnectorTy LibomptargetConnector("libomptarget"); static ompt_start_tool_result_t OmptResult; // Initialize OmptResult with the init and fini functions that will be // called by the connector - OmptResult.initialize = OmptDeviceInit; - OmptResult.finalize = OmptDeviceFini; + OmptResult.initialize = ompt::initializeLibrary; + OmptResult.finalize = ompt::finalizeLibrary; OmptResult.tool_data.value = 0; - // Initialize the device callbacks first - OmptDeviceCallbacks.init(); - // Now call connect that causes the above init/fini functions to be called LibomptargetConnector.connect(&OmptResult); - DP("OMPT: Exiting OmptCallbackInit\n"); + DP("OMPT: Exiting connectLibrary (libomptarget)\n"); } + #endif diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -24,6 +24,7 @@ #include "JIT.h" #include "MemoryManager.h" #include "Utilities.h" +#include "omp-tools.h" #include "omptarget.h" #include "llvm/ADT/SmallVector.h" @@ -512,7 +513,7 @@ /// Deinitialize the device and free all its resources. After this call, the /// device is no longer considered ready, so no queries or modifications are /// allowed. - Error deinit(); + Error deinit(GenericPluginTy &Plugin); virtual Error deinitImpl() = 0; /// Load the binary image into the device and return the target table. @@ -748,6 +749,16 @@ /// Map of host pinned allocations used for optimize device transfers. PinnedAllocationMapTy PinnedAllocs; + +#ifdef OMPT_SUPPORT + /// Callback functions +#define OmptDeclareCallback(Name, Type, Code) Name##_t Name##_fn = 0; + FOREACH_OMPT_DEVICE_EVENT(OmptDeclareCallback) +#undef OmptDeclareCallback + + /// Internal representation for OMPT device (initialize & finalize) + std::atomic OmptDevice; +#endif }; /// Class implementing common functionalities of offload plugins. Each plugin diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -12,7 +12,9 @@ #include "Debug.h" #include "GlobalHandler.h" #include "JIT.h" +#include "OmptCallback.h" #include "elf_common.h" +#include "omp-tools.h" #include "omptarget.h" #include "omptargetplugin.h" @@ -360,12 +362,40 @@ OMPX_InitialNumEvents("LIBOMPTARGET_NUM_INITIAL_EVENTS", 32), DeviceId(DeviceId), GridValues(OMPGridValues), PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock(), - PinnedAllocs(*this) {} + PinnedAllocs(*this) { +#ifdef OMPT_SUPPORT + OmptDevice.store(false); + + printf("Before BindOmptCallback (GenericDeviceTy)\n"); + // Bind the callbacks to this device's member functions +#define BindOmptCallback(Name, Type, Code) \ + ompt::lookupCallbackByCode((ompt_callbacks_t)(Code), \ + ((ompt_callback_t *)&(Name##_fn))); \ + printf("OMPT (GenericDeviceTy): class bound %s=%p\n", #Name, \ + ((void *)(uint64_t)Name##_fn)); + + FOREACH_OMPT_DEVICE_EVENT(BindOmptCallback); +#undef BindOmptCallback + printf("After BindOmptCallback (GenericDeviceTy)\n"); + +#endif +} Error GenericDeviceTy::init(GenericPluginTy &Plugin) { if (auto Err = initImpl(Plugin)) return Err; +#ifdef OMPT_SUPPORT + bool expectedStatus = false; + if (OmptDevice.compare_exchange_strong(expectedStatus, true)) + ompt_callback(device_initialize, + /* device_num */ DeviceId, + /* type */ getComputeUnitKind().c_str(), + /* device */ reinterpret_cast(this), + /* lookup */ ompt::lookupCallbackByName, + /* documentation */ nullptr); +#endif + // Read and reinitialize the envars that depend on the device initialization. // Notice these two envars may change the stack size and heap size of the // device, so they need the device properly initialized. @@ -407,8 +437,8 @@ return Plugin::success(); } -Error GenericDeviceTy::deinit() { - // Delete the memory manager before deinitilizing the device. Otherwise, +Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { + // Delete the memory manager before deinitializing the device. Otherwise, // we may delete device allocations after the device is deinitialized. if (MemoryManager) delete MemoryManager; @@ -417,9 +447,14 @@ if (RecordReplay.isRecordingOrReplaying()) RecordReplay.deinit(); +#ifdef OMPT_SUPPORT + bool expectedStatus = true; + if (OmptDevice.compare_exchange_strong(expectedStatus, false)) + ompt_callback(device_finalize, /* device_num */ DeviceId); +#endif + return deinitImpl(); } - Expected<__tgt_target_table *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, const __tgt_device_image *InputTgtImage) { @@ -457,6 +492,19 @@ if (auto Err = registerOffloadEntries(*Image)) return std::move(Err); +#ifdef OMPT_SUPPORT + size_t Bytes = getPtrDiff(InputTgtImage->ImageEnd, InputTgtImage->ImageStart); + ompt_callback(device_load, + /* device_num */ DeviceId, + /* FileName */ nullptr, + /* File Offset */ 0, + /* VmaInFile */ nullptr, + /* ImgSize */ Bytes, + /* HostAddr */ InputTgtImage->ImageStart, + /* DeviceAddr */ nullptr, + /* FIXME: ModuleId */ 0); +#endif + // Return the pointer to the table of entries. return Image->getOffloadEntryTable(); } @@ -1033,6 +1081,10 @@ if (!NumDevicesOrErr) return NumDevicesOrErr.takeError(); +#ifdef OMPT_SUPPORT + // ompt::connectLibrary(); +#endif + NumDevices = *NumDevicesOrErr; if (NumDevices == 0) return Plugin::success(); @@ -1084,7 +1136,7 @@ return Plugin::success(); // Deinitialize the device and release its resources. - if (auto Err = Devices[DeviceId]->deinit()) + if (auto Err = Devices[DeviceId]->deinit(*this)) return Err; // Delete the device and invalidate its reference. diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -19,6 +19,7 @@ #include "Debug.h" #include "DeviceEnvironment.h" #include "GlobalHandler.h" +#include "OmptCallback.h" #include "PluginInterface.h" #include "llvm/BinaryFormat/ELF.h" @@ -899,6 +900,10 @@ return 0; } +#ifdef OMPT_SUPPORT + ompt::connectLibrary(); +#endif + if (Res == CUDA_ERROR_NO_DEVICE) { // Do not initialize if there are no devices. DP("There are no devices supporting CUDA.\n"); diff --git a/openmp/libomptarget/src/OmptCallback.cpp b/openmp/libomptarget/src/OmptCallback.cpp --- a/openmp/libomptarget/src/OmptCallback.cpp +++ b/openmp/libomptarget/src/OmptCallback.cpp @@ -12,34 +12,376 @@ #ifdef OMPT_SUPPORT -#include -#include +#include "llvm/Support/DynamicLibrary.h" + #include #include - -#include "omp-tools.h" +#include #include "Debug.h" +#include "OmptCallback.h" +#include "OmptInterface.h" #include "ompt_connector.h" -#include "ompt_device_callbacks.h" -#include "private.h" -#define fnptr_to_ptr(x) ((void *)(uint64_t)x) +using namespace llvm::omp::target::ompt; + +#define OmptDefineCallback(Name, Type, Code) \ + Name##_t llvm::omp::target::ompt::Name##_fn = 0; +FOREACH_OMPT_NOEMI_EVENT(OmptDefineCallback) +FOREACH_OMPT_EMI_EVENT(OmptDefineCallback) +#undef OmptDefineCallback + +/// Thread local state for target region and associated metadata +thread_local Interface llvm::omp::target::ompt::RegionInterface; + +/// Define function pointers +ompt_get_task_data_t ompt_get_task_data_fn = nullptr; +ompt_get_target_task_data_t ompt_get_target_task_data_fn = nullptr; + +/// Unique correlation id +static std::atomic IdCounter(1); + +/// Used to create a new correlation id +static uint64_t createId() { return IdCounter.fetch_add(1); } + +/// Create a new correlation id and update the operations id +static uint64_t createOpId() { + uint64_t new_id = createId(); + llvm::omp::target::ompt::RegionInterface.setHostOpId(new_id); + return new_id; +} + +/// Create a new correlation id and update the target region id +static uint64_t createRegionId() { + uint64_t new_id = createId(); + llvm::omp::target::ompt::RegionInterface.setTargetDataValue(new_id); + return new_id; +} + +/// Get the current target region id +static uint64_t getRegionId() { + return llvm::omp::target::ompt::RegionInterface.getTargetDataValue(); +} -/// Used to indicate whether OMPT was enabled for this library -bool OmptEnabled = false; -/// Object maintaining all the callbacks for this library -OmptDeviceCallbacksTy OmptDeviceCallbacks; +void Interface::beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, + size_t Size, void *Code) { + beginTargetDataOperation(); + if (ompt_callback_target_data_op_emi_fn) { + // HostOpId will be set by the tool. Invoke the tool supplied data op EMI + // callback + ompt_callback_target_data_op_emi_fn( + ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId, + ompt_target_data_alloc, HstPtrBegin, DeviceId, /* TgtPtrBegin */ NULL, + /* TgtDeviceNum */ 0, Size, Code); + } else if (ompt_callback_target_data_op_fn) { + // HostOpId is set by the runtime + HostOpId = createOpId(); + // Invoke the tool supplied data op callback + ompt_callback_target_data_op_fn( + (&TargetData)->value, HostOpId, ompt_target_data_alloc, HstPtrBegin, + DeviceId, /* TgtPtrBegin */ NULL, /* TgtDeviceNum */ 0, Size, Code); + } +} + +void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, + size_t Size, void *Code) { + // Only EMI callback handles end scope + if (ompt_callback_target_data_op_emi_fn) { + // HostOpId will be set by the tool. Invoke the tool supplied data op EMI + // callback + ompt_callback_target_data_op_emi_fn( + ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId, + ompt_target_data_alloc, HstPtrBegin, DeviceId, /* TgtPtrBegin */ NULL, + /* TgtDeviceNum */ 0, Size, Code); + } + endTargetDataOperation(); +} + +void Interface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin, + void *HstPtrBegin, size_t Size, + void *Code) { + beginTargetDataOperation(); + if (ompt_callback_target_data_op_emi_fn) { + // HostOpId will be set by the tool. Invoke the tool supplied data op EMI + // callback + ompt_callback_target_data_op_emi_fn( + ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId, + ompt_target_data_transfer_to_device, HstPtrBegin, + /* SrcDeviceNum */ 0, TgtPtrBegin, DeviceId, Size, Code); + } else if (ompt_callback_target_data_op_fn) { + // HostOpId is set by the runtime + HostOpId = createOpId(); + // Invoke the tool supplied data op callback + ompt_callback_target_data_op_fn( + (&TargetData)->value, HostOpId, ompt_target_data_transfer_to_device, + HstPtrBegin, + /* SrcDeviceNum */ 0, TgtPtrBegin, DeviceId, Size, Code); + } +} + +void Interface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin, + void *HstPtrBegin, size_t Size, + void *Code) { + // Only EMI callback handles end scope + if (ompt_callback_target_data_op_emi_fn) { + // HostOpId will be set by the tool. Invoke the tool supplied data op EMI + // callback + ompt_callback_target_data_op_emi_fn( + ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId, + ompt_target_data_transfer_to_device, HstPtrBegin, + /* SrcDeviceNum */ 0, TgtPtrBegin, DeviceId, Size, Code); + } + endTargetDataOperation(); +} + +void Interface::beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, + void *Code) { + beginTargetDataOperation(); + if (ompt_callback_target_data_op_emi_fn) { + // HostOpId will be set by the tool. Invoke the tool supplied data op EMI + // callback + ompt_callback_target_data_op_emi_fn( + ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId, + ompt_target_data_delete, TgtPtrBegin, DeviceId, /* TgtPtrBegin */ NULL, + /* TgtDeviceNum */ 0, /* Bytes */ 0, Code); + } else if (ompt_callback_target_data_op_fn) { + // HostOpId is set by the runtime + HostOpId = createOpId(); + // Invoke the tool supplied data op callback + ompt_callback_target_data_op_fn((&TargetData)->value, HostOpId, + ompt_target_data_delete, TgtPtrBegin, + DeviceId, /* TgtPtrBegin */ NULL, + /* TgtDeviceNum */ 0, /* Bytes */ 0, Code); + } +} + +void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, + void *Code) { + // Only EMI callback handles end scope + if (ompt_callback_target_data_op_emi_fn) { + // HostOpId will be set by the tool. Invoke the tool supplied data op EMI + // callback + ompt_callback_target_data_op_emi_fn( + ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId, + ompt_target_data_delete, TgtPtrBegin, DeviceId, /* TgtPtrBegin */ NULL, + /* TgtDeviceNum */ 0, /* Bytes */ 0, Code); + } + endTargetDataOperation(); +} + +void Interface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, + void *Code) { + beginTargetDataOperation(); + if (ompt_callback_target_data_op_emi_fn) { + // HostOpId will be set by the tool. Invoke the tool supplied data op EMI + // callback + ompt_callback_target_data_op_emi_fn(ompt_scope_begin, TargetTaskData, + &TargetData, &TargetRegionOpId, + ompt_target_data_transfer_from_device, + TgtPtrBegin, DeviceId, HstPtrBegin, + /* TgtDeviceNum */ 0, Size, Code); + } else if (ompt_callback_target_data_op_fn) { + // HostOpId is set by the runtime + HostOpId = createOpId(); + // Invoke the tool supplied data op callback + ompt_callback_target_data_op_fn((&TargetData)->value, HostOpId, + ompt_target_data_transfer_from_device, + TgtPtrBegin, DeviceId, HstPtrBegin, + /* TgtDeviceNum */ 0, Size, Code); + } +} + +void Interface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, + void *Code) { + // Only EMI callback handles end scope + if (ompt_callback_target_data_op_emi_fn) { + // HostOpId will be set by the tool. Invoke the tool supplied data op EMI + // callback + ompt_callback_target_data_op_emi_fn(ompt_scope_end, TargetTaskData, + &TargetData, &TargetRegionOpId, + ompt_target_data_transfer_from_device, + TgtPtrBegin, DeviceId, HstPtrBegin, + /* TgtDeviceNum */ 0, Size, Code); + } + endTargetDataOperation(); +} + +void Interface::beginTargetSubmit(uint32_t numTeams) { + if (ompt_callback_target_submit_emi_fn) { + // HostOpId is set by the tool. Invoke the tool supplied target submit EMI + // callback + ompt_callback_target_submit_emi_fn(ompt_scope_begin, &TargetData, &HostOpId, + numTeams); + } else if (ompt_callback_target_submit_fn) { + // HostOpId is set by the runtime + HostOpId = createOpId(); + ompt_callback_target_submit_fn((&TargetData)->value, HostOpId, numTeams); + } +} + +void Interface::endTargetSubmit(uint32_t numTeams) { + // Only EMI callback handles end scope + if (ompt_callback_target_submit_emi_fn) { + // HostOpId is set by the tool. Invoke the tool supplied target submit EMI + // callback + ompt_callback_target_submit_emi_fn(ompt_scope_end, &TargetData, &HostOpId, + numTeams); + } +} + +void Interface::beginTargetDataEnter(int64_t DeviceId, void *Code) { + beginTargetRegion(); + if (ompt_callback_target_emi_fn) { + // Invoke the tool supplied target EMI callback + ompt_callback_target_emi_fn(ompt_target_enter_data, ompt_scope_begin, + DeviceId, TaskData, TargetTaskData, &TargetData, + Code); + } else if (ompt_callback_target_fn) { + TargetData.value = createRegionId(); + // Invoke the tool supplied target callback + ompt_callback_target_fn(ompt_target_enter_data, ompt_scope_begin, DeviceId, + TaskData, (&TargetData)->value, Code); + } +} + +void Interface::endTargetDataEnter(int64_t DeviceId, void *Code) { + if (ompt_callback_target_emi_fn) { + // Invoke the tool supplied target EMI callback + ompt_callback_target_emi_fn(ompt_target_enter_data, ompt_scope_end, + DeviceId, TaskData, TargetTaskData, &TargetData, + Code); + } else if (ompt_callback_target_fn) { + TargetData.value = getRegionId(); + // Invoke the tool supplied target callback + ompt_callback_target_fn(ompt_target_enter_data, ompt_scope_end, DeviceId, + TaskData, (&TargetData)->value, Code); + } + endTargetRegion(); +} + +void Interface::beginTargetDataExit(int64_t DeviceId, void *Code) { + beginTargetRegion(); + if (ompt_callback_target_emi_fn) { + // Invoke the tool supplied target EMI callback + ompt_callback_target_emi_fn(ompt_target_exit_data, ompt_scope_begin, + DeviceId, TaskData, TargetTaskData, &TargetData, + Code); + } else if (ompt_callback_target_fn) { + TargetData.value = createRegionId(); + // Invoke the tool supplied target callback + ompt_callback_target_fn(ompt_target_exit_data, ompt_scope_begin, DeviceId, + TaskData, (&TargetData)->value, Code); + } +} + +void Interface::endTargetDataExit(int64_t DeviceId, void *Code) { + if (ompt_callback_target_emi_fn) { + // Invoke the tool supplied target EMI callback + ompt_callback_target_emi_fn(ompt_target_exit_data, ompt_scope_end, DeviceId, + TaskData, TargetTaskData, &TargetData, Code); + } else if (ompt_callback_target_fn) { + TargetData.value = getRegionId(); + // Invoke the tool supplied target callback + ompt_callback_target_fn(ompt_target_exit_data, ompt_scope_end, DeviceId, + TaskData, (&TargetData)->value, Code); + } + endTargetRegion(); +} + +void Interface::beginTargetUpdate(int64_t DeviceId, void *Code) { + beginTargetRegion(); + if (ompt_callback_target_emi_fn) { + // Invoke the tool supplied target EMI callback + ompt_callback_target_emi_fn(ompt_target_update, ompt_scope_begin, DeviceId, + TaskData, TargetTaskData, &TargetData, Code); + } else if (ompt_callback_target_fn) { + TargetData.value = createRegionId(); + // Invoke the tool supplied target callback + ompt_callback_target_fn(ompt_target_update, ompt_scope_begin, DeviceId, + TaskData, (&TargetData)->value, Code); + } +} + +void Interface::endTargetUpdate(int64_t DeviceId, void *Code) { + if (ompt_callback_target_emi_fn) { + // Invoke the tool supplied target EMI callback + ompt_callback_target_emi_fn(ompt_target_update, ompt_scope_end, DeviceId, + TaskData, TargetTaskData, &TargetData, Code); + } else if (ompt_callback_target_fn) { + TargetData.value = getRegionId(); + // Invoke the tool supplied target callback + ompt_callback_target_fn(ompt_target_update, ompt_scope_end, DeviceId, + TaskData, (&TargetData)->value, Code); + } + endTargetRegion(); +} + +void Interface::beginTarget(int64_t DeviceId, void *Code) { + beginTargetRegion(); + if (ompt_callback_target_emi_fn) { + // Invoke the tool supplied target EMI callback + ompt_callback_target_emi_fn(ompt_target, ompt_scope_begin, DeviceId, + TaskData, TargetTaskData, &TargetData, Code); + } else if (ompt_callback_target_fn) { + TargetData.value = createRegionId(); + // Invoke the tool supplied target callback + ompt_callback_target_fn(ompt_target, ompt_scope_begin, DeviceId, TaskData, + (&TargetData)->value, Code); + } +} + +void Interface::endTarget(int64_t DeviceId, void *Code) { + if (ompt_callback_target_emi_fn) { + // Invoke the tool supplied target EMI callback + ompt_callback_target_emi_fn(ompt_target, ompt_scope_end, DeviceId, TaskData, + TargetTaskData, &TargetData, Code); + } else if (ompt_callback_target_fn) { + TargetData.value = getRegionId(); + // Invoke the tool supplied target callback + ompt_callback_target_fn(ompt_target, ompt_scope_end, DeviceId, TaskData, + (&TargetData)->value, Code); + } + endTargetRegion(); +} + +void Interface::beginTargetDataOperation() { + DP("in ompt_target_region_begin (TargetRegionOpId = %lu)\n", + TargetData.value); +} + +void Interface::endTargetDataOperation() { + DP("in ompt_target_region_end (TargetRegionOpId = %lu)\n", TargetData.value); +} + +void Interface::beginTargetRegion() { + // Set up task state + assert(ompt_get_task_data_fn && "Calling a null task data function"); + TaskData = ompt_get_task_data_fn(); + // Set up target task state + assert(ompt_get_target_task_data_fn && + "Calling a null target task data function"); + TargetTaskData = ompt_get_target_task_data_fn(); + // Target state will be set later + TargetData = ompt_data_none; +} + +void Interface::endTargetRegion() { + TaskData = 0; + TargetTaskData = 0; + TargetData = ompt_data_none; +} /// Used to maintain the finalization function that is received /// from the plugin during connect -class LibomptargetRtlFinalizer { +class LibomptargetPluginFinalizer { public: - LibomptargetRtlFinalizer() : FiniFn(nullptr) {} + LibomptargetPluginFinalizer() : FiniFn(nullptr) {} void registerRtl(ompt_finalize_t fn) { FiniFn = fn; } void finalize() { if (FiniFn) - FiniFn(nullptr /* tool_data */); + FiniFn(/* tool_data */ nullptr); FiniFn = nullptr; } @@ -48,69 +390,64 @@ }; /// Object that will maintain the finalizer of the plugin -static LibomptargetRtlFinalizer LibOmptTargetRTLFinalizer; - -/// Lookup function to be used by libomptarget library -ompt_interface_fn_t -OmptDeviceCallbacksTy::doLookup(const char *InterfaceFunctionName) { - return OmptDeviceCallbacks.lookupCallback(InterfaceFunctionName); -} - -/// This is the function called by the higher layer (libomp) responsible -/// for initializing OMPT in this library. This is passed to libomp -/// as part of the OMPT connector object. -/// \p lookup to be used to query callbacks registered with libomp -/// \p initial_device_num Initial device num provided by libomp -/// \p tool_data as provided by the tool -static int ompt_libomptarget_initialize(ompt_function_lookup_t lookup, - int initial_device_num, - ompt_data_t *tool_data) { - DP("enter ompt_libomptarget_initialize!\n"); - OmptEnabled = true; - // The lookup parameter is provided by libomp which already has the - // tool callbacks registered at this point. The registration call - // below causes the same callback functions to be registered in - // libomptarget as well - OmptDeviceCallbacks.registerCallbacks(lookup); - DP("exit ompt_libomptarget_initialize!\n"); +static LibomptargetPluginFinalizer PluginFinalizer; + +ompt_get_callback_t llvm::omp::target::ompt::lookupCallbackByCode = 0; +ompt_function_lookup_t llvm::omp::target::ompt::lookupCallbackByName = 0; + +int llvm::omp::target::ompt::initializeLibrary(ompt_function_lookup_t lookup, + int initial_device_num, + ompt_data_t *tool_data) { + DP("OMPT: Executing initializeLibrary (libomp)\n"); +#define BindOmptFunctionName(srcFn, dstFn) \ + dstFn = (srcFn##_t)lookup(#srcFn); \ + DP("OMPT: initializeLibrary (libomp) bound %s=%p\n", #dstFn, (void *)(dstFn)); + + BindOmptFunctionName(ompt_get_callback, lookupCallbackByCode); + BindOmptFunctionName(ompt_get_task_data, ompt_get_task_data_fn); + BindOmptFunctionName(ompt_get_target_task_data, ompt_get_target_task_data_fn); +#undef BindOmptFunctionName + + // Store pointer of 'ompt_libomp_target_fn_lookup' for use by libomptarget + lookupCallbackByName = lookup; + + assert(lookupCallbackByCode && "lookupCallbackByCode should be non-null"); + assert(lookupCallbackByName && "lookupCallbackByName should be non-null"); + return 0; } -/// This function is passed to libomp as part of the OMPT connector object. -/// It is called by libomp during finalization of OMPT in libomptarget. -static void ompt_libomptarget_finalize(ompt_data_t *data) { - DP("enter ompt_libomptarget_finalize!\n"); +void llvm::omp::target::ompt::finalizeLibrary(ompt_data_t *data) { + DP("OMPT: Executing finalizeLibrary (libomp)\n"); // Before disabling OMPT, call the finalizer (of the plugin) that was // registered with this library - LibOmptTargetRTLFinalizer.finalize(); - OmptEnabled = false; - DP("exit ompt_libomptarget_finalize!\n"); -} - -/***************************************************************************** - * constructor - *****************************************************************************/ -/// Used to initialize callbacks implemented by the tool. This interface -/// will lookup the callbacks table in libomp and assign them to the callbacks -/// maintained in libomptarget. -void InitOmptLibomp() { - DP("OMPT: Enter InitOmptLibomp\n"); + PluginFinalizer.finalize(); +} + +void llvm::omp::target::ompt::connectLibrary() { + DP("OMPT: Entering connectLibrary (libomp)\n"); // Connect with libomp static OmptLibraryConnectorTy LibompConnector("libomp"); static ompt_start_tool_result_t OmptResult; // Initialize OmptResult with the init and fini functions that will be // called by the connector - OmptResult.initialize = ompt_libomptarget_initialize; - OmptResult.finalize = ompt_libomptarget_finalize; + OmptResult.initialize = ompt::initializeLibrary; + OmptResult.finalize = ompt::finalizeLibrary; OmptResult.tool_data.value = 0; - // Initialize the device callbacks first - OmptDeviceCallbacks.init(); - // Now call connect that causes the above init/fini functions to be called LibompConnector.connect(&OmptResult); - DP("OMPT: Exit InitOmptLibomp\n"); + +#define OmptBindCallback(Name, Type, Code) \ + lookupCallbackByCode( \ + (ompt_callbacks_t)(Code), \ + (ompt_callback_t *)&(llvm::omp::target::ompt::Name##_fn)); + FOREACH_OMPT_NOEMI_EVENT(OmptBindCallback) + FOREACH_OMPT_EMI_EVENT(OmptBindCallback) +#undef OmptBindCallback + + DP("OMPT: Exiting connectLibrary (libomp)\n"); } #endif // OMPT_SUPPORT @@ -119,14 +456,14 @@ /// Used for connecting libomptarget with a plugin void ompt_libomptarget_connect(ompt_start_tool_result_t *result) { DP("OMPT: Enter ompt_libomptarget_connect\n"); - if (OmptEnabled && result) { + if (result) { // Cache the fini function so that it can be invoked on exit - LibOmptTargetRTLFinalizer.registerRtl(result->finalize); + PluginFinalizer.registerRtl(result->finalize); // Invoke the provided init function with the lookup function maintained // in this library so that callbacks maintained by this library are // retrieved. - result->initialize(OmptDeviceCallbacksTy::doLookup, - 0 /* initial_device_num */, nullptr /* tool_data */); + result->initialize(lookupCallbackByName, + /* initial_device_num */ 0, /* tool_data */ nullptr); } DP("OMPT: Leave ompt_libomptarget_connect\n"); } diff --git a/openmp/libomptarget/src/OmptInterface.h b/openmp/libomptarget/src/OmptInterface.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/OmptInterface.h @@ -0,0 +1,288 @@ +//===-------- OmptInterface.h - Target independent OpenMP target RTL ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declarations for OpenMP Tool callback dispatchers +// +//===----------------------------------------------------------------------===// + +#ifndef _OMPTARGET_OMPTINTERFACE_H +#define _OMPTARGET_OMPTINTERFACE_H + +#include +#include + +#include "OmptCallback.h" +#include "omp-tools.h" + +#include "stdio.h" + +// If target OMPT support is compiled in +#ifdef OMPT_SUPPORT +#define OMPT_IF_BUILT(stmt) stmt +#else +#define OMPT_IF_BUILT(stmt) +#endif + +#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) + +/// Callbacks for target regions require task_data representing the +/// encountering task. +/// Callbacks for target regions and target data ops require +/// target_task_data representing the target task region. +typedef ompt_data_t *(*ompt_get_task_data_t)(); +typedef ompt_data_t *(*ompt_get_target_task_data_t)(); + +namespace llvm { +namespace omp { +namespace target { +namespace ompt { + +enum class InterfaceOpType { Target, TargetData, TargetSubmit }; + +/// Function pointers that will be used to track task_data and +/// target_task_data. +static ompt_get_task_data_t ompt_get_task_data_fn; +static ompt_get_target_task_data_t ompt_get_target_task_data_fn; + +/// Used to maintain execution state for this thread +struct Interface { +public: + /// Top-level function for invoking callback before device data allocation + void beginTargetDataAlloc(int64_t DeviceId, void *TgtPtrBegin, size_t Size, + void *Code); + + /// Top-level function for invoking callback after device data allocation + void endTargetDataAlloc(int64_t DeviceId, void *TgtPtrBegin, size_t Size, + void *Code); + + /// Top-level function for invoking callback before data submit + void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code); + + /// Top-level function for invoking callback after data submit + void endTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code); + + /// Top-level function for invoking callback before device data deallocation + void beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code); + + /// Top-level function for invoking callback after device data deallocation + void endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code); + + /// Top-level function for invoking callback before data retrieve + void beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code); + + /// Top-level function for invoking callback after data retrieve + void endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code); + + /// Top-level function for invoking callback before kernel dispatch + void beginTargetSubmit(unsigned int NumTeams = 1); + + /// Top-level function for invoking callback after kernel dispatch + void endTargetSubmit(unsigned int NumTeams = 1); + + // Target region callbacks + + /// Top-level function for invoking callback before target enter data + /// construct + void beginTargetDataEnter(int64_t DeviceId, void *Code); + + /// Top-level function for invoking callback after target enter data + /// construct + void endTargetDataEnter(int64_t DeviceId, void *Code); + + /// Top-level function for invoking callback before target exit data + /// construct + void beginTargetDataExit(int64_t DeviceId, void *Code); + + /// Top-level function for invoking callback after target exit data + /// construct + void endTargetDataExit(int64_t DeviceId, void *Code); + + /// Top-level function for invoking callback before target update construct + void beginTargetUpdate(int64_t DeviceId, void *Code); + + /// Top-level function for invoking callback after target update construct + void endTargetUpdate(int64_t DeviceId, void *Code); + + /// Top-level function for invoking callback before target construct + void beginTarget(int64_t DeviceId, void *Code); + + /// Top-level function for invoking callback after target construct + void endTarget(int64_t DeviceId, void *Code); + + /// Setters for target region and target operations correlation ids + void setHostOpId(ompt_id_t id) { HostOpId = id; } + void setTargetDataValue(uint64_t val) { TargetData.value = val; } + void setTargetDataPtr(void *ptr) { TargetData.ptr = ptr; } + + /// Getters for target region and target operations correlation ids + uint64_t getTargetDataValue() { return TargetData.value; } + void *getTargetDataPtr() { return TargetData.ptr; } + ompt_id_t getHostOpId() { return HostOpId; } + +private: + /// Target operations id + ompt_id_t HostOpId = 0; + + /// Target region data + ompt_data_t TargetData = ompt_data_none; + + /// Task data representing the encountering task + ompt_data_t *TaskData = nullptr; + + /// Target task data representing the target task region + ompt_data_t *TargetTaskData = nullptr; + + /// Correlation id that is incremented with target operations + uint64_t TargetRegionOpId = 1; + + /// Used for marking begin of a data operation + void beginTargetDataOperation(); + + /// Used for marking end of a data operation + void endTargetDataOperation(); + + /// Used for marking begin of a target region + void beginTargetRegion(); + + /// Used for marking end of a target region + void endTargetRegion(); +}; + +/// Thread local state for target region and associated metadata +extern thread_local Interface RegionInterface; + +template struct InterfaceRAII { + InterfaceRAII(T ty, K ki, Args... as) + : Type{ty}, Kind{ki}, + Arguments(std::make_tuple(as..., OMPT_GET_RETURN_ADDRESS(0))) { + begin(); + } + ~InterfaceRAII() { end(); } + + void begin() { + if (Type == InterfaceOpType::TargetSubmit) { + printf("InterfaceRAII :: begin :: TargetSubmit\n"); + std::invoke(&Interface::beginTargetSubmit, RegionInterface, + std::get<0>(Arguments)); + } else { + if (Type == InterfaceOpType::Target) { + if (Kind == ompt_target) { + printf("InterfaceRAII :: begin :: ompt_target\n"); + // RegionInterface.beginTarget(std::forward(Arguments)); + } else if ((Kind == ompt_target_enter_data)) { + printf("InterfaceRAII :: begin :: ompt_target_enter_data\n"); + // RegionInterface.beginTargetDataEnter(std::forward(Arguments)); + // WIP + /* + std::apply( + [](auto... args) { + std::invoke(&Interface::beginTargetSubmit, RegionInterface, + args...); + }, + Arguments + ); + */ + } else if ((Kind == ompt_target_exit_data)) { + printf("InterfaceRAII :: begin :: ompt_target_exit_data\n"); + // RegionInterface.beginTargetDataExit(std::forward(Arguments)); + } else if ((Kind == ompt_target_update)) { + printf("InterfaceRAII :: begin :: ompt_target_update\n"); + // RegionInterface.beginTargetUpdate(std::forward(Arguments)); + } + } else if (Type == InterfaceOpType::TargetData) { + if (Kind == ompt_target_data_alloc) { + printf("InterfaceRAII :: begin :: ompt_target_data_alloc\n"); + // RegionInterface.beginTargetDataAlloc(std::forward(Arguments)); + } else if (Kind == ompt_target_data_transfer_to_device) { + printf("InterfaceRAII :: begin :: " + "ompt_target_data_transfer_to_device\n"); + // RegionInterface.beginTargetDataSubmit( + // std::forward(Arguments)); + /* + std::apply( + [](auto ...args) { + std::invoke(&Interface::beginTargetDataSubmit, RegionInterface, + args...); + }, + Arguments); + // */ + } else if (Kind == ompt_target_data_transfer_from_device) { + printf("InterfaceRAII :: begin :: " + "ompt_target_data_transfer_from_device\n"); + // RegionInterface.beginTargetDataRetrieve(std::forward(Arguments)); + } else if (Kind == ompt_target_data_delete) { + printf("InterfaceRAII :: begin :: " + "ompt_target_data_delete\n"); + // RegionInterface.beginTargetDataDelete(std::forward(Arguments)); + } + } + } + } + + void end() { + if (Type == InterfaceOpType::TargetSubmit) { + printf("InterfaceRAII :: end :: TargetSubmit\n"); + std::invoke(&Interface::endTargetSubmit, RegionInterface, + std::get<0>(Arguments)); + } else { + if (Type == InterfaceOpType::Target) { + if (Kind == ompt_target) { + printf("InterfaceRAII :: end :: ompt_target\n"); + // RegionInterface.endTargetDataAlloc(std::forward(Arguments)); + } else if (Kind == ompt_target_enter_data) { + printf("InterfaceRAII :: end :: ompt_target_enter_data\n"); + // WIP + /* + std::invoke(&Interface::beginTargetSubmit, RegionInterface, + Arguments); + */ + } else if (Kind == ompt_target_exit_data) { + printf("InterfaceRAII :: end :: ompt_target_exit_data\n"); + // RegionInterface.endTargetDataExit(std::forward(Arguments)); + } else if (Kind == ompt_target_update) { + printf("InterfaceRAII :: end :: ompt_target_update\n"); + // RegionInterface.endTargetUpdate(std::forward(Arguments)); + } + } else if (Type == InterfaceOpType::TargetData) { + if (Kind == ompt_target_data_alloc) { + printf("InterfaceRAII :: end :: ompt_target_data_alloc\n"); + // RegionInterface.endTarget(std::forward(Arguments)); + } else if (Kind == ompt_target_data_transfer_to_device) { + printf("InterfaceRAII :: end :: " + "ompt_target_data_transfer_to_device\n"); + // RegionInterface.endTargetDataSubmit(std::forward(Arguments)); + } else if (Kind == ompt_target_data_transfer_from_device) { + printf("InterfaceRAII :: end :: " + "ompt_target_data_transfer_from_device\n"); + // RegionInterface.endTargetDataRetrieve(std::forward(Arguments)); + } else if (Kind == ompt_target_data_delete) { + printf("InterfaceRAII :: end :: " + "ompt_target_data_delete\n"); + // RegionInterface.endTargetDataDelete(std::forward(Arguments)); + } + } + } + } + +private: + // Arguments used for function invocation with appended return address pointer + const T Type; + const K Kind; + const std::tuple Arguments; +}; + +} // namespace ompt +} // namespace target +} // namespace omp +} // namespace llvm + +#endif // _OMPTARGET_OMPTINTERFACE_H diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -11,6 +11,8 @@ //===----------------------------------------------------------------------===// #include "device.h" +#include "OmptCallback.h" +#include "OmptInterface.h" #include "omptarget.h" #include "private.h" #include "rtl.h" @@ -23,6 +25,78 @@ #include #include +using namespace llvm::omp::target::ompt; + +/// RAII used to invoke callbacks before and after data ops +struct OmptInterfaceTargetDataOpRAII { + OmptInterfaceTargetDataOpRAII(int32_t Id, int64_t Sz, void *HPtr, void *TPtr, + ompt_target_data_op_t Op) + : DeviceId{Id}, Size{Sz}, CodePtr{nullptr}, HostPtr{HPtr}, TgtPtr{TPtr}, + TgtDataOp{Op} { + OMPT_IF_BUILT(beginOp()); + } + ~OmptInterfaceTargetDataOpRAII() { OMPT_IF_BUILT(endOp()); } + +private: + int32_t DeviceId; // Target device + int64_t Size; // Size of data transfer + void *CodePtr; // Return address + void *HostPtr; // Host data ptr + void *TgtPtr; // Target data ptr + ompt_target_data_op_t TgtDataOp; // Data transfer type + void beginOp() { + // if (!OmptEnabled) return; + CodePtr = OMPT_GET_RETURN_ADDRESS(0); + switch (TgtDataOp) { + case ompt_target_data_alloc: + case ompt_target_data_alloc_async: + RegionInterface.beginTargetDataAlloc(DeviceId, HostPtr, Size, CodePtr); + break; + case ompt_target_data_delete: + case ompt_target_data_delete_async: + RegionInterface.beginTargetDataDelete(DeviceId, TgtPtr, CodePtr); + break; + case ompt_target_data_transfer_to_device: + case ompt_target_data_transfer_to_device_async: + RegionInterface.beginTargetDataSubmit(DeviceId, TgtPtr, HostPtr, Size, + CodePtr); + break; + case ompt_target_data_transfer_from_device: + case ompt_target_data_transfer_from_device_async: + RegionInterface.beginTargetDataRetrieve(DeviceId, HostPtr, TgtPtr, Size, + CodePtr); + break; + default: + break; + } + } + void endOp() { + // if (!OmptEnabled) return; + switch (TgtDataOp) { + case ompt_target_data_alloc: + case ompt_target_data_alloc_async: + RegionInterface.endTargetDataAlloc(DeviceId, HostPtr, Size, CodePtr); + break; + case ompt_target_data_delete: + case ompt_target_data_delete_async: + RegionInterface.endTargetDataDelete(DeviceId, TgtPtr, CodePtr); + break; + case ompt_target_data_transfer_to_device: + case ompt_target_data_transfer_to_device_async: + RegionInterface.endTargetDataSubmit(DeviceId, TgtPtr, HostPtr, Size, + CodePtr); + break; + case ompt_target_data_transfer_from_device: + case ompt_target_data_transfer_from_device_async: + RegionInterface.endTargetDataRetrieve(DeviceId, HostPtr, TgtPtr, Size, + CodePtr); + break; + default: + break; + } + } +}; + int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const { // First, check if the user disabled atomic map transfer/malloc/dealloc. @@ -548,10 +622,17 @@ } void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { + /// RAII to establish tool anchors before and after data allocation + OmptInterfaceTargetDataOpRAII TgtDataAlloc( + RTLDeviceID, Size, HstPtr, nullptr /* TgtPtr */, ompt_target_data_alloc); return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); } int32_t DeviceTy::deleteData(void *TgtPtrBegin, int32_t Kind) { + /// RAII to establish tool anchors before and after data deletion + OmptInterfaceTargetDataOpRAII TgtDataDelete( + RTLDeviceID, 0 /* Size */, nullptr /* HostPtr */, TgtPtrBegin, + ompt_target_data_delete); return RTL->data_delete(RTLDeviceID, TgtPtrBegin, Kind); } @@ -583,6 +664,15 @@ Entry); } + /// RAII to establish tool anchors before and after data submit + OmptInterfaceTargetDataOpRAII TargetDataSubmitRAII( + RTLDeviceID, Size, HstPtrBegin, TgtPtrBegin, + ompt_target_data_transfer_to_device); + + InterfaceRAII TargetDataSubmitRAII2( + InterfaceOpType::TargetData, ompt_target_data_transfer_to_device, + RTLDeviceID, Size, HstPtrBegin, TgtPtrBegin); + if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, @@ -604,6 +694,11 @@ Entry); } + /// RAII to establish tool anchors before and after data retrieval + OmptInterfaceTargetDataOpRAII TargetDataRetrieve( + RTLDeviceID, Size, HstPtrBegin, TgtPtrBegin, + ompt_target_data_transfer_from_device); + if (!RTL->data_retrieve_async || !RTL->synchronize) return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "OmptCallback.h" +#include "OmptInterface.h" #include "device.h" #include "omptarget.h" #include "private.h" @@ -24,6 +26,68 @@ #include #include +using namespace llvm::omp::target::ompt; + +// InterfaceRAII +// TargetDataBeginRAII(DeviceId); + +/// RAII used to invoke callbacks before and after target regions +struct OmptInterfaceTargetRAII { + OmptInterfaceTargetRAII(int64_t Id, ompt_target_t Op) + : CodePtr{nullptr}, DeviceId{Id}, TgtOp{Op} { + OMPT_IF_BUILT(beginOp()); + } + ~OmptInterfaceTargetRAII() { OMPT_IF_BUILT(endOp()); } + +private: + void *CodePtr; // Return address + int64_t DeviceId; // Target device + ompt_target_t TgtOp; // Target operation + void beginOp() { + // if (!OmptEnabled) return; + CodePtr = OMPT_GET_RETURN_ADDRESS(0); + switch (TgtOp) { + case ompt_target_enter_data: + case ompt_target_enter_data_nowait: + RegionInterface.beginTargetDataEnter(DeviceId, CodePtr); + break; + case ompt_target_exit_data: + case ompt_target_exit_data_nowait: + RegionInterface.beginTargetDataExit(DeviceId, CodePtr); + break; + case ompt_target_update: + case ompt_target_update_nowait: + RegionInterface.beginTargetUpdate(DeviceId, CodePtr); + break; + case ompt_target: + case ompt_target_nowait: + RegionInterface.beginTarget(DeviceId, CodePtr); + break; + } + } + void endOp() { + // if (!OmptEnabled) return; + switch (TgtOp) { + case ompt_target_enter_data: + case ompt_target_enter_data_nowait: + RegionInterface.endTargetDataEnter(DeviceId, CodePtr); + break; + case ompt_target_exit_data: + case ompt_target_exit_data_nowait: + RegionInterface.endTargetDataExit(DeviceId, CodePtr); + break; + case ompt_target_update: + case ompt_target_update_nowait: + RegionInterface.endTargetUpdate(DeviceId, CodePtr); + break; + case ompt_target: + case ompt_target_nowait: + RegionInterface.endTarget(DeviceId, CodePtr); + break; + } + } +}; + //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t Flags) { @@ -123,6 +187,10 @@ map_var_info_t *ArgNames, void **ArgMappers) { TIMESCOPE_WITH_IDENT(Loc); + /// RAII to establish tool anchors before and after data begin + OmptInterfaceTargetRAII TargetDataBeginRAII(DeviceId, ompt_target_enter_data); + InterfaceRAII TargetDataBeginRAII2(InterfaceOpType::Target, + ompt_target_enter_data, DeviceId); targetDataMapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataBegin, "Entering OpenMP data region", "begin"); @@ -149,6 +217,8 @@ map_var_info_t *ArgNames, void **ArgMappers) { TIMESCOPE_WITH_IDENT(Loc); + /// RAII to establish tool anchors before and after data end + OmptInterfaceTargetRAII TargetDataEndRAII(DeviceId, ompt_target_exit_data); targetDataMapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataEnd, "Exiting OpenMP data region", "end"); @@ -172,6 +242,8 @@ map_var_info_t *ArgNames, void **ArgMappers) { TIMESCOPE_WITH_IDENT(Loc); + /// RAII to establish tool anchors before and after data update + OmptInterfaceTargetRAII TargetDataUpdateRAII(DeviceId, ompt_target_update); targetDataMapper( Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataUpdate, "Updating OpenMP data", "update"); @@ -270,6 +342,8 @@ DeviceTy &Device = *PM->Devices[DeviceId]; TargetAsyncInfoTy TargetAsyncInfo(Device); + /// RAII to establish tool anchors before and after target region + OmptInterfaceTargetRAII TargetRAII(DeviceId, ompt_target); AsyncInfoTy &AsyncInfo = TargetAsyncInfo; int Rc = OFFLOAD_SUCCESS; @@ -336,7 +410,8 @@ return OMP_TGT_FAIL; } DeviceTy &Device = *PM->Devices[DeviceId]; - + /// RAII to establish tool anchors before and after target region + OmptInterfaceTargetRAII TargetRAII(DeviceId, ompt_target); AsyncInfoTy AsyncInfo(Device); int Rc = target_replay(Loc, Device, HostPtr, DeviceMemory, DeviceMemorySize, TgtArgs, TgtOffsets, NumArgs, NumTeams, ThreadLimit, diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "omptarget.h" +#include "OmptCallback.h" +#include "OmptInterface.h" #include "device.h" #include "private.h" #include "rtl.h" @@ -23,6 +25,26 @@ #include using llvm::SmallVector; +using namespace llvm::omp::target::ompt; + +/// RAII used to invoke callbacks before and after kernel launch +struct OmptInterfaceTargetSubmitRAII { + OmptInterfaceTargetSubmitRAII(int32_t Teams) : NumTeams{Teams} { + OMPT_IF_BUILT(beginSubmit()); + } + ~OmptInterfaceTargetSubmitRAII() { OMPT_IF_BUILT(endSubmit()); } + +private: + int32_t NumTeams; // Number of teams + void beginSubmit() { + // if (!OmptEnabled) return; + RegionInterface.beginTargetSubmit(NumTeams); + } + void endSubmit() { + // if (!OmptEnabled) return; + RegionInterface.endTargetSubmit(NumTeams); + } +}; int AsyncInfoTy::synchronize() { int Result = OFFLOAD_SUCCESS; @@ -1678,6 +1700,16 @@ { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); TIMESCOPE_WITH_NAME_AND_IDENT("Initiate Kernel Launch", Loc); + +#ifdef OMPT_SUPPORT + /// RAII to establish tool anchors before and after kernel launch + uint32_t NumTeams = KernelArgs.NumTeams[0] * KernelArgs.NumTeams[1] * + KernelArgs.NumTeams[2]; + OmptInterfaceTargetSubmitRAII TargetSubmitRAII(NumTeams); + InterfaceRAII TargetSubmitRAII2(InterfaceOpType::TargetSubmit, + /* Kind */ 0, NumTeams); +#endif + Ret = Device.launchKernel(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(), KernelArgs, AsyncInfo); } diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -12,6 +12,7 @@ #include "llvm/Object/OffloadBinary.h" +#include "OmptCallback.h" #include "device.h" #include "private.h" #include "rtl.h" @@ -44,7 +45,7 @@ static char *ProfileTraceFile = nullptr; #ifdef OMPT_SUPPORT -extern void InitOmptLibomp(); +extern void ompt::connectLibrary(); #endif __attribute__((constructor(101))) void init() { @@ -69,10 +70,10 @@ if (ProfileTraceFile) timeTraceProfilerInitialize(500 /* us */, "libomptarget"); - #ifdef OMPT_SUPPORT - // Initialize OMPT first - InitOmptLibomp(); - #endif +#ifdef OMPT_SUPPORT + // Initialize OMPT first + ompt::connectLibrary(); +#endif PM->RTLs.loadRTLs(); PM->registerDelayedLibraries(); diff --git a/openmp/libomptarget/test/ompt/veccopy.c b/openmp/libomptarget/test/ompt/veccopy.c --- a/openmp/libomptarget/test/ompt/veccopy.c +++ b/openmp/libomptarget/test/ompt/veccopy.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -56,11 +53,29 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' -/// CHECK: Could not register callback 'ompt_callback_target_data_op' -/// CHECK: Could not register callback 'ompt_callback_target' -/// CHECK: Could not register callback 'ompt_callback_target_submit' +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 -/// CHECK: Success +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_disallow_both.c b/openmp/libomptarget/test/ompt/veccopy_disallow_both.c --- a/openmp/libomptarget/test/ompt/veccopy_disallow_both.c +++ b/openmp/libomptarget/test/ompt/veccopy_disallow_both.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -59,11 +56,44 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' -/// CHECK: Could not register callback 'ompt_callback_target_data_op' -/// CHECK: Could not register callback 'ompt_callback_target' -/// CHECK: Could not register callback 'ompt_callback_target_submit' - -/// CHECK: Success +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_emi.c b/openmp/libomptarget/test/ompt/veccopy_emi.c --- a/openmp/libomptarget/test/ompt/veccopy_emi.c +++ b/openmp/libomptarget/test/ompt/veccopy_emi.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -57,11 +54,46 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' -/// CHECK: Could not register callback 'ompt_callback_target_data_op_emi' -/// CHECK: Could not register callback 'ompt_callback_target_emi' -/// CHECK: Could not register callback 'ompt_callback_target_submit_emi' - -/// CHECK: Success +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 +/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=0 +/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=0 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_emi_map.c b/openmp/libomptarget/test/ompt/veccopy_emi_map.c --- a/openmp/libomptarget/test/ompt/veccopy_emi_map.c +++ b/openmp/libomptarget/test/ompt/veccopy_emi_map.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -57,12 +54,47 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' -/// CHECK: Could not register callback 'ompt_callback_target_data_op_emi' -/// CHECK: Could not register callback 'ompt_callback_target_emi' -/// CHECK: Could not register callback 'ompt_callback_target_submit_emi' -/// CHECK: Could not register callback 'ompt_callback_target_map_emi' - -/// CHECK: Success +/// CHECK: 0: Could not register callback 'ompt_callback_target_map_emi' +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 +/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=0 +/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=0 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_map.c b/openmp/libomptarget/test/ompt/veccopy_map.c --- a/openmp/libomptarget/test/ompt/veccopy_map.c +++ b/openmp/libomptarget/test/ompt/veccopy_map.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -56,11 +53,31 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' -/// CHECK: Could not register callback 'ompt_callback_target_data_op' -/// CHECK: Could not register callback 'ompt_callback_target' -/// CHECK: Could not register callback 'ompt_callback_target_submit' -/// CHECK: Success +/// CHECK: 0: Could not register callback 'ompt_callback_target_map' +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 + +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_no_device_init.c b/openmp/libomptarget/test/ompt/veccopy_no_device_init.c --- a/openmp/libomptarget/test/ompt/veccopy_no_device_init.c +++ b/openmp/libomptarget/test/ompt/veccopy_no_device_init.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -57,9 +54,29 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_load' -/// CHECK: Could not register callback 'ompt_callback_target_data_op' -/// CHECK: Could not register callback 'ompt_callback_target' -/// CHECK: Could not register callback 'ompt_callback_target_submit' +/// CHECK-NOT: Callback Init: +/// CHECK-NOT: Callback Load: +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 -/// CHECK: Success +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 +/// CHECK-NOT: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_wrong_return.c b/openmp/libomptarget/test/ompt/veccopy_wrong_return.c --- a/openmp/libomptarget/test/ompt/veccopy_wrong_return.c +++ b/openmp/libomptarget/test/ompt/veccopy_wrong_return.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -57,11 +54,29 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' -/// CHECK: Could not register callback 'ompt_callback_target_data_op' -/// CHECK: Could not register callback 'ompt_callback_target' -/// CHECK: Could not register callback 'ompt_callback_target_submit' +/// CHECK-NOT: Callback Init: +/// CHECK-NOT: Callback Load: +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 -/// CHECK: Success +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 +/// CHECK-NOT: Callback Fini diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt --- a/openmp/runtime/src/exports_so.txt +++ b/openmp/runtime/src/exports_so.txt @@ -25,8 +25,8 @@ # # OMPT API # - ompt_start_tool; # OMPT start interface - ompt_libomp_connect; # OMPT libomptarget interface + ompt_start_tool; # OMPT start interface + ompt_libomp_connect; # OMPT libomptarget interface ompc_*; # omp.h renames some standard functions to ompc_*. kmp_*; # Intel extensions. diff --git a/openmp/runtime/src/kmp_utility.cpp b/openmp/runtime/src/kmp_utility.cpp --- a/openmp/runtime/src/kmp_utility.cpp +++ b/openmp/runtime/src/kmp_utility.cpp @@ -407,6 +407,7 @@ #if !OMPT_SUPPORT extern "C" { typedef struct ompt_start_tool_result_t ompt_start_tool_result_t; + // Define symbols expected by VERSION script ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, const char *runtime_version) { diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -55,19 +55,18 @@ #define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_emi_implemented ompt_event_UNIMPLEMENTED - +#define ompt_callback_target_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_emi_implemented ompt_event_MAY_ALWAYS #define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_device_initialize_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_device_finalize_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_initialize_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_device_finalize_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_device_load_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_load_implemented ompt_event_MAY_ALWAYS #define ompt_callback_device_unload_implemented ompt_event_UNIMPLEMENTED /*---------------------------------------------------------------------------- diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -498,8 +498,8 @@ ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( ompt_thread_initial, __ompt_get_thread_data_internal()); } - ompt_data_t *task_data; - ompt_data_t *parallel_data; + ompt_data_t *task_data = nullptr; + ompt_data_t *parallel_data = nullptr; __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL); if (ompt_enabled.ompt_callback_implicit_task) { @@ -878,8 +878,23 @@ return NULL; } +static ompt_data_t *ompt_get_task_data() { return __ompt_get_task_data(); } + +static ompt_data_t *ompt_get_target_task_data() { + return __ompt_get_target_task_data(); +} + /// Lookup function to query libomp callbacks registered by the tool static ompt_interface_fn_t ompt_libomp_target_fn_lookup(const char *s) { +#define provide_fn(fn) \ + if (strcmp(s, #fn) == 0) \ + return (ompt_interface_fn_t)fn; + + provide_fn(ompt_get_callback); + provide_fn(ompt_get_task_data); + provide_fn(ompt_get_target_task_data); +#undef provide_fn + #define ompt_interface_fn(fn, type, code) \ if (strcmp(s, #fn) == 0) \ return (ompt_interface_fn_t)ompt_callbacks.ompt_callback(fn); @@ -887,7 +902,6 @@ FOREACH_OMPT_DEVICE_EVENT(ompt_interface_fn) FOREACH_OMPT_EMI_EVENT(ompt_interface_fn) FOREACH_OMPT_NOEMI_EVENT(ompt_interface_fn) - #undef ompt_interface_fn return (ompt_interface_fn_t)0; @@ -896,7 +910,7 @@ /// This function is called by the libomptarget connector to assign /// callbacks already registered with libomp. _OMP_EXTERN void ompt_libomp_connect(ompt_start_tool_result_t *result) { - OMPT_VERBOSE_INIT_PRINT("libomp --> OMPT: Enter libomp_ompt_connect\n"); + OMPT_VERBOSE_INIT_PRINT("libomp --> OMPT: Enter ompt_libomp_connect\n"); // Ensure libomp callbacks have been added if not already __ompt_force_initialization(); @@ -912,11 +926,11 @@ // functions can be extracted and assigned to the callbacks in // libomptarget result->initialize(ompt_libomp_target_fn_lookup, - 0 /* initial_device_num */, nullptr /* tool_data */); + /* initial_device_num */ 0, /* tool_data */ nullptr); // Track the object provided by libomptarget so that the finalizer can be // called during OMPT finalization libomptarget_ompt_result = result; } } - OMPT_VERBOSE_INIT_PRINT("libomp --> OMPT: Exit libomp_ompt_connect\n"); + OMPT_VERBOSE_INIT_PRINT("libomp --> OMPT: Exit ompt_libomp_connect\n"); } diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -76,6 +76,7 @@ ompt_data_t thread_data; ompt_data_t task_data; /* stored here from implicit barrier-begin until implicit-task-end */ + ompt_data_t target_task_data; /* required by target support */ void *return_address; /* stored here on entry of runtime */ ompt_state_t state; ompt_wait_id_t wait_id; diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h --- a/openmp/runtime/src/ompt-specific.h +++ b/openmp/runtime/src/ompt-specific.h @@ -37,6 +37,10 @@ ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size); +ompt_data_t *__ompt_get_task_data(); + +ompt_data_t *__ompt_get_target_task_data(); + ompt_task_info_t *__ompt_get_task_info_object(int depth); int __ompt_get_parallel_info_internal(int ancestor_level, @@ -61,12 +65,12 @@ * macros ****************************************************************************/ -#define OMPT_CUR_TASK_INFO(thr) (&(thr->th.th_current_task->ompt_task_info)) +#define OMPT_CUR_TASK_INFO(thr) (&((thr)->th.th_current_task->ompt_task_info)) #define OMPT_CUR_TASK_DATA(thr) \ - (&(thr->th.th_current_task->ompt_task_info.task_data)) -#define OMPT_CUR_TEAM_INFO(thr) (&(thr->th.th_team->t.ompt_team_info)) + (&((thr)->th.th_current_task->ompt_task_info.task_data)) +#define OMPT_CUR_TEAM_INFO(thr) (&((thr)->th.th_team->t.ompt_team_info)) #define OMPT_CUR_TEAM_DATA(thr) \ - (&(thr->th.th_team->t.ompt_team_info.parallel_data)) + (&((thr)->th.th_team->t.ompt_team_info.parallel_data)) #define OMPT_HAVE_WEAK_ATTRIBUTE KMP_HAVE_WEAK_ATTRIBUTE #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp --- a/openmp/runtime/src/ompt-specific.cpp +++ b/openmp/runtime/src/ompt-specific.cpp @@ -344,6 +344,16 @@ // task support //---------------------------------------------------------- +ompt_data_t *__ompt_get_task_data() { + kmp_info_t *thr = ompt_get_thread(); + ompt_data_t *task_data = thr ? OMPT_CUR_TASK_DATA(thr) : NULL; + return task_data; +} + +ompt_data_t *__ompt_get_target_task_data() { + return &__kmp_threads[__kmp_get_gtid()]->th.ompt_thread_info.target_task_data; +} + int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, ompt_frame_t **task_frame,