diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "device.h" +#include "ompt_callback.h" #include "omptarget.h" #include "private.h" #include "rtl.h" @@ -22,6 +23,78 @@ #include #include +/// RAII used to invoke callbacks before and after data ops +struct OmptInterfaceTargetDataOpRAII { + OmptInterfaceTargetDataOpRAII(int32_t Id, int64_t Sz, void *HPtr, void *TPtr, + ompt_target_data_op_t Op) + : DeviceId{Id}, Size{Sz}, CodePtr{nullptr}, HostPtr{HPtr}, TgtPtr{TPtr}, + TgtDataOp{Op} { + OMPT_IF_BUILT(beginOp()); + } + ~OmptInterfaceTargetDataOpRAII() { OMPT_IF_BUILT(endOp()); } + +private: + int32_t DeviceId; // Target device + int64_t Size; // Size of data transfer + void *CodePtr; // Return address + void *HostPtr; // Host data ptr + void *TgtPtr; // Target data ptr + ompt_target_data_op_t TgtDataOp; // Data transfer type + void beginOp() { + if (!ompt_enabled) + return; + CodePtr = OMPT_GET_RETURN_ADDRESS(0); + switch (TgtDataOp) { + case ompt_target_data_alloc: + case ompt_target_data_alloc_async: + ompt_interface.beginTargetDataAlloc(DeviceId, HostPtr, Size, CodePtr); + break; + case ompt_target_data_delete: + case ompt_target_data_delete_async: + ompt_interface.beginTargetDataDelete(DeviceId, TgtPtr, CodePtr); + break; + case ompt_target_data_transfer_to_device: + case ompt_target_data_transfer_to_device_async: + ompt_interface.beginTargetDataSubmit(DeviceId, TgtPtr, HostPtr, Size, + CodePtr); + break; + case ompt_target_data_transfer_from_device: + case ompt_target_data_transfer_from_device_async: + ompt_interface.beginTargetDataRetrieve(DeviceId, HostPtr, TgtPtr, Size, + CodePtr); + break; + default: + break; + } + } + void endOp() { + if (!ompt_enabled) + return; + switch (TgtDataOp) { + case ompt_target_data_alloc: + case ompt_target_data_alloc_async: + ompt_interface.endTargetDataAlloc(DeviceId, HostPtr, Size, CodePtr); + break; + case ompt_target_data_delete: + case ompt_target_data_delete_async: + ompt_interface.endTargetDataDelete(DeviceId, TgtPtr, CodePtr); + break; + case ompt_target_data_transfer_to_device: + case ompt_target_data_transfer_to_device_async: + ompt_interface.endTargetDataSubmit(DeviceId, TgtPtr, HostPtr, Size, + CodePtr); + break; + case ompt_target_data_transfer_from_device: + case ompt_target_data_transfer_from_device_async: + ompt_interface.endTargetDataRetrieve(DeviceId, HostPtr, TgtPtr, Size, + CodePtr); + break; + default: + break; + } + } +}; + int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const { // First, check if the user disabled atomic map transfer/malloc/dealloc. @@ -542,10 +615,17 @@ } void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { + /// RAII to establish tool anchors before and after data allocation + OmptInterfaceTargetDataOpRAII TgtDataAlloc( + RTLDeviceID, Size, HstPtr, nullptr /* TgtPtr */, ompt_target_data_alloc); return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); } int32_t DeviceTy::deleteData(void *TgtPtrBegin, int32_t Kind) { + /// RAII to establish tool anchors before and after data deletion + OmptInterfaceTargetDataOpRAII TgtDataDelete( + RTLDeviceID, 0 /* Size */, nullptr /* HostPtr */, TgtPtrBegin, + ompt_target_data_delete); return RTL->data_delete(RTLDeviceID, TgtPtrBegin, Kind); } @@ -565,6 +645,11 @@ : "unknown"); } + /// RAII to establish tool anchors before and after data submit + OmptInterfaceTargetDataOpRAII TargetDataSubmitRAII( + RTLDeviceID, Size, HstPtrBegin, TgtPtrBegin, + ompt_target_data_transfer_to_device); + if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, @@ -586,6 +671,11 @@ : "unknown"); } + /// RAII to establish tool anchors before and after data retrieval + OmptInterfaceTargetDataOpRAII TargetDataRetrieve( + RTLDeviceID, Size, HstPtrBegin, TgtPtrBegin, + ompt_target_data_transfer_from_device); + if (!RTL->data_retrieve_async || !RTL->synchronize) return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "device.h" +#include "ompt_callback.h" #include "omptarget.h" #include "private.h" #include "rtl.h" @@ -24,6 +25,65 @@ #include #include +/// RAII used to invoke callbacks before and after target regions +struct OmptInterfaceTargetRAII { + OmptInterfaceTargetRAII(int64_t Id, ompt_target_t Op) + : CodePtr{nullptr}, DeviceId{Id}, TgtOp{Op} { + OMPT_IF_BUILT(beginOp()); + } + ~OmptInterfaceTargetRAII() { OMPT_IF_BUILT(endOp()); } + +private: + void *CodePtr; // Return address + int64_t DeviceId; // Target device + ompt_target_t TgtOp; // Target operation + void beginOp() { + if (!ompt_enabled) + return; + CodePtr = OMPT_GET_RETURN_ADDRESS(0); + switch (TgtOp) { + case ompt_target_enter_data: + case ompt_target_enter_data_nowait: + ompt_interface.beginTargetDataEnter(DeviceId, CodePtr); + break; + case ompt_target_exit_data: + case ompt_target_exit_data_nowait: + ompt_interface.beginTargetDataExit(DeviceId, CodePtr); + break; + case ompt_target_update: + case ompt_target_update_nowait: + ompt_interface.beginTargetUpdate(DeviceId, CodePtr); + break; + case ompt_target: + case ompt_target_nowait: + ompt_interface.beginTarget(DeviceId, CodePtr); + break; + } + } + void endOp() { + if (!ompt_enabled) + return; + switch (TgtOp) { + case ompt_target_enter_data: + case ompt_target_enter_data_nowait: + ompt_interface.endTargetDataEnter(DeviceId, CodePtr); + break; + case ompt_target_exit_data: + case ompt_target_exit_data_nowait: + ompt_interface.endTargetDataExit(DeviceId, CodePtr); + break; + case ompt_target_update: + case ompt_target_update_nowait: + ompt_interface.endTargetUpdate(DeviceId, CodePtr); + break; + case ompt_target: + case ompt_target_nowait: + ompt_interface.endTarget(DeviceId, CodePtr); + break; + } + } +}; + //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t Flags) { @@ -123,6 +183,9 @@ map_var_info_t *ArgNames, void **ArgMappers) { TIMESCOPE_WITH_IDENT(Loc); + /// RAII to establish tool anchors before and after data begin + OmptInterfaceTargetRAII TargetDataBeginRAII(DeviceId, + ompt_target_enter_data); targetDataMapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataBegin, "Entering OpenMP data region", "begin"); @@ -149,6 +212,8 @@ map_var_info_t *ArgNames, void **ArgMappers) { TIMESCOPE_WITH_IDENT(Loc); + /// RAII to establish tool anchors before and after data end + OmptInterfaceTargetRAII TargetDataEndRAII(DeviceId, ompt_target_exit_data); targetDataMapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataEnd, "Exiting OpenMP data region", "end"); @@ -172,6 +237,8 @@ map_var_info_t *ArgNames, void **ArgMappers) { TIMESCOPE_WITH_IDENT(Loc); + /// RAII to establish tool anchors before and after data update + OmptInterfaceTargetRAII TargetDataUpdateRAII(DeviceId, ompt_target_update); targetDataMapper( Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataUpdate, "Updating OpenMP data", "update"); @@ -270,6 +337,8 @@ DeviceTy &Device = *PM->Devices[DeviceId]; TargetAsyncInfoTy TargetAsyncInfo(Device); + /// RAII to establish tool anchors before and after target region + OmptInterfaceTargetRAII TargetRAII(DeviceId, ompt_target); AsyncInfoTy &AsyncInfo = TargetAsyncInfo; int Rc = OFFLOAD_SUCCESS; @@ -336,7 +405,8 @@ return OMP_TGT_FAIL; } DeviceTy &Device = *PM->Devices[DeviceId]; - + /// RAII to establish tool anchors before and after target region + OmptInterfaceTargetRAII TargetRAII(DeviceId, ompt_target); AsyncInfoTy AsyncInfo(Device); int Rc = target_replay(Loc, Device, HostPtr, DeviceMemory, DeviceMemorySize, TgtArgs, TgtOffsets, NumArgs, NumTeams, ThreadLimit, diff --git a/openmp/libomptarget/src/ompt_callback.h b/openmp/libomptarget/src/ompt_callback.h --- a/openmp/libomptarget/src/ompt_callback.h +++ b/openmp/libomptarget/src/ompt_callback.h @@ -20,6 +20,8 @@ #define OMPT_IF_BUILT(stmt) #endif +#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) + #include "omp-tools.h" /// Used to maintain execution state for this thread diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -13,6 +13,7 @@ #include "omptarget.h" #include "device.h" +#include "ompt_callback.h" #include "private.h" #include "rtl.h" @@ -22,6 +23,27 @@ using llvm::SmallVector; +/// RAII used to invoke callbacks before and after kernel launch +struct OmptInterfaceTargetSubmitRAII { + OmptInterfaceTargetSubmitRAII(int32_t Teams) : NumTeams{Teams} { + OMPT_IF_BUILT(beginSubmit()); + } + ~OmptInterfaceTargetSubmitRAII() { OMPT_IF_BUILT(endSubmit()); } + +private: + int32_t NumTeams; // Number of teams + void beginSubmit() { + if (!ompt_enabled) + return; + ompt_interface.beginTargetSubmit(NumTeams); + } + void endSubmit() { + if (!ompt_enabled) + return; + ompt_interface.endTargetSubmit(NumTeams); + } +}; + int AsyncInfoTy::synchronize() { int Result = OFFLOAD_SUCCESS; if (!isQueueEmpty()) { @@ -1694,6 +1716,14 @@ { assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!"); TIMESCOPE_WITH_NAME_AND_IDENT("Initiate Kernel Launch", Loc); + +#ifdef OMPT_SUPPORT + /// RAII to establish tool anchors before and after kernel launch + int32_t NumTeams = KernelArgs.NumTeams[0] + KernelArgs.NumTeams[1] + + KernelArgs.NumTeams[2]; + OmptInterfaceTargetSubmitRAII TargetSubmitRAII(NumTeams); +#endif + Ret = Device.launchKernel(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(), KernelArgs, AsyncInfo); } diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -55,13 +55,12 @@ #define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_emi_implemented ompt_event_UNIMPLEMENTED - +#define ompt_callback_target_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_emi_implemented ompt_event_MAY_ALWAYS #define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS #define ompt_callback_device_initialize_implemented ompt_event_MAY_ALWAYS