diff --git a/openmp/libomptarget/include/ompt_device_callbacks.h b/openmp/libomptarget/include/ompt_device_callbacks.h --- a/openmp/libomptarget/include/ompt_device_callbacks.h +++ b/openmp/libomptarget/include/ompt_device_callbacks.h @@ -14,6 +14,9 @@ #ifndef _OMPT_DEVICE_CALLBACKS_H #define _OMPT_DEVICE_CALLBACKS_H +#include +#include + #include #define FOREACH_OMPT_TARGET_CALLBACK(macro) \ @@ -21,9 +24,65 @@ FOREACH_OMPT_NOEMI_EVENT(macro) \ FOREACH_OMPT_EMI_EVENT(macro) +/// Internal representation for OMPT device +class OmptDeviceTy { +public: + OmptDeviceTy() { Enabled.store(false); } + bool initialize() { + bool Old = false; + return Enabled.compare_exchange_strong(Old, true); + } + bool finalize() { + bool Old = true; + return Enabled.compare_exchange_strong(Old, false); + } + +private: + std::atomic Enabled; +}; + /// Internal representation for OMPT device callback functions. class OmptDeviceCallbacksTy { public: + /// Invoked when a device is initialized + void OmptCallbackDeviceInitialize(int DeviceNum, const char *Type) { + if (ompt_callback_device_initialize_fn) { + OmptDeviceTy *Device = lookupDevice(DeviceNum); + if (Device && Device->initialize()) { + ompt_callback_device_initialize_fn( + DeviceNum, Type, (ompt_device_t *)Device, doLookup, Documentation); + } + } + } + + /// Invoked when a device is finalized + void OmptCallbackDeviceFinalize(int DeviceNum) { + if (ompt_callback_device_finalize_fn) { + OmptDeviceTy *Device = lookupDevice(DeviceNum); + if (Device && Device->finalize()) { + ompt_callback_device_finalize_fn(DeviceNum); + } + } + } + + /// Invoked when a device image is loaded + void OmptCallbackDeviceLoad(int DeviceNum, const char *Filename, + int64_t OffsetInFile, void *VmaInFile, + size_t Bytes, void *HostAddr, void *DeviceAddr, + uint64_t ModuleId) { + if (ompt_callback_device_load_fn) { + ompt_callback_device_load_fn(DeviceNum, Filename, OffsetInFile, VmaInFile, + Bytes, HostAddr, DeviceAddr, ModuleId); + } + } + + /// Invoked when a device image is unloaded + void OmptCallbackDeviceUnload(int DeviceNum, uint64_t ModuleId) { + if (ompt_callback_device_unload_fn) { + ompt_callback_device_unload_fn(DeviceNum, ModuleId); + } + } + /// Initialize the enabled flag and all the callbacks void init() { Enabled = false; @@ -32,6 +91,12 @@ #undef initName } + bool isEnabled() { return Enabled; } + /// Devices must be prepared before use + void prepareDevices(int NumDevices) { resize(NumDevices); } + /// Devices must be released after use + void releaseDevices() { release(); } + /// Used to register callbacks. \p Lookup is used to query a given callback /// by name and the result is assigned to the corresponding callback function. void registerCallbacks(ompt_function_lookup_t Lookup) { @@ -66,6 +131,15 @@ #define DeclareName(Name, Type, Code) Name##_t Name##_fn; FOREACH_OMPT_TARGET_CALLBACK(DeclareName) #undef DeclareName + + /// Allocate devices + static void resize(int NumDevices); + /// Deallocate devices + static void release(); + /// Find a device given its id + static OmptDeviceTy *lookupDevice(int DeviceNum); + /// Documentation based on omp-tools + static const char *Documentation; }; /// Device callbacks object for the library that performs the instantiation diff --git a/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp b/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/ompt_callback.cpp @@ -21,6 +21,21 @@ /// Object maintaining all the callbacks in the plugin OmptDeviceCallbacksTy ompt_device_callbacks; +const char *OmptDeviceCallbacksTy::Documentation = 0; + +/// Array denoting the devices +static OmptDeviceTy *devices = 0; + +void OmptDeviceCallbacksTy::resize(int NumDevices) { + devices = new OmptDeviceTy[NumDevices]; +} + +void OmptDeviceCallbacksTy::release() { delete[] devices; } + +OmptDeviceTy *OmptDeviceCallbacksTy::lookupDevice(int DeviceNum) { + return &devices[DeviceNum]; +} + /// Lookup function used for querying callback functions maintained /// by the plugin ompt_interface_fn_t diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -38,6 +38,18 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" +#ifdef OMPT_SUPPORT +#include "ompt_device_callbacks.h" +#define OMPT_IF_BUILT_AND_ENABLED(stmts) \ + do { \ + if (ompt_device_callbacks.isEnabled()) { \ + stmts; \ + } \ + } while (0) +#else +#define OMPT_IF_BUILT_AND_ENABLED(stmts) +#endif + // hostrpc interface, FIXME: consider moving to its own include these are // statically linked into amdgpu/plugin if present from hostrpc_services.a, // linked as --whole-archive to override the weak symbols that are used to @@ -963,6 +975,29 @@ return res; } +#ifdef OMPT_SUPPORT + void doOmptDeviceLoad(int32_t DeviceId, const char *FileName, + int64_t OffsetInFile, void *VmaInFile, size_t Bytes, + void *HostAddr, void *DeviceAddr, uint64_t ModuleId) { + ompt_device_callbacks.OmptCallbackDeviceLoad( + DeviceId, FileName, OffsetInFile, VmaInFile, Bytes, HostAddr, + DeviceAddr, ModuleId); + } + + void doOmptDeviceInitialize(int32_t DeviceId, const char *InfoName) { + std::string OmptGPUType("AMD "); + OmptGPUType += InfoName; + ompt_device_callbacks.OmptCallbackDeviceInitialize(DeviceId, + OmptGPUType.c_str()); + } + + void doOmptDeviceFinalize() { + for (int i = 0; i < NumberOfDevices; i++) + ompt_device_callbacks.OmptCallbackDeviceFinalize(i); + ompt_device_callbacks.releaseDevices(); + } +#endif + RTLDeviceInfoTy() { DP("Start initializing " GETNAME(TARGET_NAME) "\n"); @@ -1038,6 +1073,10 @@ return; } +#ifdef OMPT_SUPPORT + ompt_device_callbacks.prepareDevices(NumberOfDevices); +#endif + for (int i = 0; i < NumberOfDevices; i++) { uint32_t queue_size = 0; { @@ -1090,6 +1129,9 @@ // Then none of these can have been set up and they can't be torn down return; } + + OMPT_IF_BUILT_AND_ENABLED(doOmptDeviceFinalize()); + // Run destructors on types that use HSA before // impl_finalize removes access to it deviceStateStore.clear(); @@ -2074,6 +2116,9 @@ DeviceInfo.GroupsPerDevice[device_id] * DeviceInfo.ThreadsPerGroup[device_id]); + OMPT_IF_BUILT_AND_ENABLED( + DeviceInfo.doOmptDeviceInitialize(device_id, GetInfoName)); + return OFFLOAD_SUCCESS; } @@ -2165,6 +2210,10 @@ DP("AMDGPU module successfully loaded!\n"); + OMPT_IF_BUILT_AND_ENABLED(DeviceInfo.doOmptDeviceLoad( + device_id, nullptr /* FileName */, 0 /* OffsetInFile */, + nullptr /* VmaInFile */, img_size, image->ImageStart, + nullptr /* DeviceAddr */, 0 /* FIXME ModuleId */)); { // the device_State array is either large value in bss or a void* that // needs to be assigned to a pointer to an array of size device_state_bytes diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -64,10 +64,10 @@ #define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_device_initialize_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_device_finalize_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_initialize_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_device_finalize_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_device_load_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_load_implemented ompt_event_MAY_ALWAYS #define ompt_callback_device_unload_implemented ompt_event_UNIMPLEMENTED /*----------------------------------------------------------------------------