diff --git a/openmp/libomptarget/include/ompt_device_callbacks.h b/openmp/libomptarget/include/ompt_device_callbacks.h --- a/openmp/libomptarget/include/ompt_device_callbacks.h +++ b/openmp/libomptarget/include/ompt_device_callbacks.h @@ -17,7 +17,9 @@ #ifdef OMPT_SUPPORT #include "Debug.h" +#include #include +#include #define DEBUG_PREFIX "OMPT" @@ -26,9 +28,66 @@ FOREACH_OMPT_NOEMI_EVENT(macro) \ FOREACH_OMPT_EMI_EVENT(macro) +/// Internal representation for OMPT device +class OmptDeviceTy { +public: + OmptDeviceTy() { Enabled.store(false); } + bool initialize() { + bool Old = false; + return Enabled.compare_exchange_strong(Old, true); + } + bool finalize() { + bool Old = true; + return Enabled.compare_exchange_strong(Old, false); + } + +private: + std::atomic Enabled; +}; + /// Internal representation for OMPT device callback functions. class OmptDeviceCallbacksTy { public: + /// Invoked when a device is initialized + void OmptCallbackDeviceInitialize(int DeviceNum, const char *Type) { + if (ompt_callback_device_initialize_fn) { + OmptDeviceTy *Device = lookupDevice(DeviceNum); + if (Device && Device->initialize()) { + ompt_callback_device_initialize_fn( + DeviceNum, Type, reinterpret_cast(Device), + doLookup, Documentation); + } + } + } + + /// Invoked when a device is finalized + void OmptCallbackDeviceFinalize(int DeviceNum) { + if (ompt_callback_device_finalize_fn) { + OmptDeviceTy *Device = lookupDevice(DeviceNum); + if (Device && Device->finalize()) { + ompt_callback_device_finalize_fn(DeviceNum); + } + } + } + + /// Invoked when a device image is loaded + void OmptCallbackDeviceLoad(int DeviceNum, const char *Filename, + int64_t OffsetInFile, void *VmaInFile, + size_t Bytes, void *HostAddr, void *DeviceAddr, + uint64_t ModuleId) { + if (ompt_callback_device_load_fn) { + ompt_callback_device_load_fn(DeviceNum, Filename, OffsetInFile, VmaInFile, + Bytes, HostAddr, DeviceAddr, ModuleId); + } + } + + /// Invoked when a device image is unloaded + void OmptCallbackDeviceUnload(int DeviceNum, uint64_t ModuleId) { + if (ompt_callback_device_unload_fn) { + ompt_callback_device_unload_fn(DeviceNum, ModuleId); + } + } + /// Initialize the enabled flag and all the callbacks void init() { Enabled = false; @@ -37,6 +96,12 @@ #undef initName } + bool isEnabled() { return Enabled; } + /// Devices must be prepared before use + void prepareDevices(int NumDevices) { allocate(NumDevices); } + /// Devices must be released after use + void releaseDevices() { release(); } + /// Used to register callbacks. \p Lookup is used to query a given callback /// by name and the result is assigned to the corresponding callback function. void registerCallbacks(ompt_function_lookup_t Lookup) { @@ -65,12 +130,23 @@ private: /// Set to true if callbacks for this library have been initialized - bool Enabled; + bool Enabled = false; + /// Number of allocated devices + static int NumAllocatedDevices; /// Callback functions #define DeclareName(Name, Type, Code) Name##_t Name##_fn; FOREACH_OMPT_TARGET_CALLBACK(DeclareName) #undef DeclareName + + /// Allocate devices + static void allocate(int NumDevices); + /// Deallocate devices + static void release(); + /// Find a device given its id + static OmptDeviceTy *lookupDevice(int DeviceNum); + /// Documentation based on omp-tools + static const char *Documentation; }; /// Device callbacks object for the library that performs the instantiation diff --git a/openmp/libomptarget/plugins-nextgen/common/OMPT/OmptCallback.cpp b/openmp/libomptarget/plugins-nextgen/common/OMPT/OmptCallback.cpp --- a/openmp/libomptarget/plugins-nextgen/common/OMPT/OmptCallback.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/OMPT/OmptCallback.cpp @@ -23,6 +23,29 @@ /// Object maintaining all the callbacks in the plugin OmptDeviceCallbacksTy OmptDeviceCallbacks; +const char *OmptDeviceCallbacksTy::Documentation = 0; +int OmptDeviceCallbacksTy::NumAllocatedDevices = 0; + +/// Array denoting the devices +static OmptDeviceTy *Devices = nullptr; + +void OmptDeviceCallbacksTy::allocate(int NumDevices) { + if (!Devices && (NumDevices > 0)) { + NumAllocatedDevices = NumDevices; + Devices = new OmptDeviceTy[NumDevices]; + } else { + DP("OMPT: Device allocation failed\n"); + } +} + +void OmptDeviceCallbacksTy::release() { delete[] Devices; } + +OmptDeviceTy *OmptDeviceCallbacksTy::lookupDevice(int DeviceNum) { + return ((DeviceNum >= 0) && (DeviceNum < NumAllocatedDevices)) + ? &Devices[DeviceNum] + : nullptr; +} + /// Lookup function used for querying callback functions maintained /// by the plugin ompt_interface_fn_t diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -509,7 +509,7 @@ /// Deinitialize the device and free all its resources. After this call, the /// device is no longer considered ready, so no queries or modifications are /// allowed. - Error deinit(); + Error deinit(GenericPluginTy &Plugin); virtual Error deinitImpl() = 0; /// Load the binary image into the device and return the target table. diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -13,6 +13,7 @@ #include "GlobalHandler.h" #include "JIT.h" #include "elf_common.h" +#include "ompt_device_callbacks.h" #include "omptarget.h" #include "omptargetplugin.h" @@ -29,6 +30,10 @@ using namespace target; using namespace plugin; +#ifdef OMPT_SUPPORT +extern void OmptCallbackInit(); +#endif + GenericPluginTy *Plugin::SpecificPlugin = nullptr; // TODO: Fix any thread safety issues for multi-threaded kernel recording. @@ -357,6 +362,12 @@ if (auto Err = initImpl(Plugin)) return Err; +#ifdef OMPT_SUPPORT + OmptDeviceCallbacks.prepareDevices(Plugin.getNumDevices()); + OmptDeviceCallbacks.OmptCallbackDeviceInitialize( + DeviceId, getComputeUnitKind().c_str()); +#endif + // Read and reinitialize the envars that depend on the device initialization. // Notice these two envars may change the stack size and heap size of the // device, so they need the device properly initialized. @@ -398,8 +409,8 @@ return Plugin::success(); } -Error GenericDeviceTy::deinit() { - // Delete the memory manager before deinitilizing the device. Otherwise, +Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { + // Delete the memory manager before deinitializing the device. Otherwise, // we may delete device allocations after the device is deinitialized. if (MemoryManager) delete MemoryManager; @@ -408,9 +419,14 @@ if (RecordReplay.isRecordingOrReplaying()) RecordReplay.deinit(); +#ifdef OMPT_SUPPORT + for (int i = 0; i < Plugin.getNumDevices(); ++i) + OmptDeviceCallbacks.OmptCallbackDeviceFinalize(i); + OmptDeviceCallbacks.releaseDevices(); +#endif + return deinitImpl(); } - Expected<__tgt_target_table *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, const __tgt_device_image *InputTgtImage) { @@ -448,6 +464,15 @@ if (auto Err = registerOffloadEntries(*Image)) return std::move(Err); +#ifdef OMPT_SUPPORT + size_t Bytes = getPtrDiff(InputTgtImage->ImageEnd, InputTgtImage->ImageStart); + OmptDeviceCallbacks.OmptCallbackDeviceLoad( + DeviceId, nullptr /* FileName */, 0 /* File Offset */, + nullptr /* VmaInFile */, Bytes /* ImgSize */, + InputTgtImage->ImageStart /* HostAddr */, nullptr /* DeviceAddr */, + 0 /* FIXME: ModuleId */); +#endif + // Return the pointer to the table of entries. return Image->getOffloadEntryTable(); } @@ -1018,6 +1043,10 @@ if (!NumDevicesOrErr) return NumDevicesOrErr.takeError(); +#ifdef OMPT_SUPPORT + OmptCallbackInit(); +#endif + NumDevices = *NumDevicesOrErr; if (NumDevices == 0) return Plugin::success(); @@ -1069,7 +1098,7 @@ return Plugin::success(); // Deinitialize the device and release its resources. - if (auto Err = Devices[DeviceId]->deinit()) + if (auto Err = Devices[DeviceId]->deinit(*this)) return Err; // Delete the device and invalidate its reference. @@ -1489,6 +1518,9 @@ return OFFLOAD_FAIL; } + // OMPT_IF_BUILT_AND_ENABLED( + // DeviceInfo().doOmptDeviceInitialize(DeviceId, GetInfoName)); + return OFFLOAD_SUCCESS; } diff --git a/openmp/libomptarget/test/ompt/veccopy.c b/openmp/libomptarget/test/ompt/veccopy.c --- a/openmp/libomptarget/test/ompt/veccopy.c +++ b/openmp/libomptarget/test/ompt/veccopy.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -56,11 +53,10 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' /// CHECK: Could not register callback 'ompt_callback_target_data_op' /// CHECK: Could not register callback 'ompt_callback_target' /// CHECK: Could not register callback 'ompt_callback_target_submit' -/// CHECK: Success +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_disallow_both.c b/openmp/libomptarget/test/ompt/veccopy_disallow_both.c --- a/openmp/libomptarget/test/ompt/veccopy_disallow_both.c +++ b/openmp/libomptarget/test/ompt/veccopy_disallow_both.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -59,11 +56,10 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' /// CHECK: Could not register callback 'ompt_callback_target_data_op' /// CHECK: Could not register callback 'ompt_callback_target' /// CHECK: Could not register callback 'ompt_callback_target_submit' -/// CHECK: Success +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_emi.c b/openmp/libomptarget/test/ompt/veccopy_emi.c --- a/openmp/libomptarget/test/ompt/veccopy_emi.c +++ b/openmp/libomptarget/test/ompt/veccopy_emi.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -57,11 +54,10 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' /// CHECK: Could not register callback 'ompt_callback_target_data_op_emi' /// CHECK: Could not register callback 'ompt_callback_target_emi' /// CHECK: Could not register callback 'ompt_callback_target_submit_emi' -/// CHECK: Success +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_emi_map.c b/openmp/libomptarget/test/ompt/veccopy_emi_map.c --- a/openmp/libomptarget/test/ompt/veccopy_emi_map.c +++ b/openmp/libomptarget/test/ompt/veccopy_emi_map.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -57,12 +54,11 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' /// CHECK: Could not register callback 'ompt_callback_target_data_op_emi' /// CHECK: Could not register callback 'ompt_callback_target_emi' /// CHECK: Could not register callback 'ompt_callback_target_submit_emi' /// CHECK: Could not register callback 'ompt_callback_target_map_emi' -/// CHECK: Success +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_map.c b/openmp/libomptarget/test/ompt/veccopy_map.c --- a/openmp/libomptarget/test/ompt/veccopy_map.c +++ b/openmp/libomptarget/test/ompt/veccopy_map.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -56,11 +53,10 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' /// CHECK: Could not register callback 'ompt_callback_target_data_op' /// CHECK: Could not register callback 'ompt_callback_target' /// CHECK: Could not register callback 'ompt_callback_target_submit' -/// CHECK: Success +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_no_device_init.c b/openmp/libomptarget/test/ompt/veccopy_no_device_init.c --- a/openmp/libomptarget/test/ompt/veccopy_no_device_init.c +++ b/openmp/libomptarget/test/ompt/veccopy_no_device_init.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -57,9 +54,10 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_load' /// CHECK: Could not register callback 'ompt_callback_target_data_op' /// CHECK: Could not register callback 'ompt_callback_target' /// CHECK: Could not register callback 'ompt_callback_target_submit' -/// CHECK: Success +/// CHECK-NOT: Callback Init: +/// CHECK-NOT: Callback Load: +/// CHECK-NOT: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_wrong_return.c b/openmp/libomptarget/test/ompt/veccopy_wrong_return.c --- a/openmp/libomptarget/test/ompt/veccopy_wrong_return.c +++ b/openmp/libomptarget/test/ompt/veccopy_wrong_return.c @@ -1,8 +1,5 @@ // RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: ompt -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-oldDriver -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO // UNSUPPORTED: x86_64-pc-linux-gnu // UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver // UNSUPPORTED: x86_64-pc-linux-gnu-LTO @@ -57,11 +54,10 @@ return rc; } -/// CHECK: Could not register callback 'ompt_callback_device_initialize' -/// CHECK: Could not register callback 'ompt_callback_device_finalize' -/// CHECK: Could not register callback 'ompt_callback_device_load' /// CHECK: Could not register callback 'ompt_callback_target_data_op' /// CHECK: Could not register callback 'ompt_callback_target' /// CHECK: Could not register callback 'ompt_callback_target_submit' -/// CHECK: Success +/// CHECK-NOT: Callback Init: +/// CHECK-NOT: Callback Load: +/// CHECK-NOT: Callback Fini: diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -64,10 +64,10 @@ #define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_device_initialize_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_device_finalize_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_initialize_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_device_finalize_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_device_load_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_device_load_implemented ompt_event_MAY_ALWAYS #define ompt_callback_device_unload_implemented ompt_event_UNIMPLEMENTED /*----------------------------------------------------------------------------