diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "device.h" +#include "ompt_callback.h" #include "omptarget.h" #include "private.h" #include "rtl.h" @@ -22,6 +23,78 @@ #include #include +/// RAII used to invoke callbacks before and after data ops +struct OmptInterfaceTargetDataOpRAII { + OmptInterfaceTargetDataOpRAII(int32_t Id, int64_t Sz, void *HPtr, void *TPtr, + ompt_target_data_op_t Op) + : DeviceId{Id}, Size{Sz}, CodePtr{nullptr}, HostPtr{HPtr}, TgtPtr{TPtr}, + TgtDataOp{Op} { + OMPT_IF_BUILT(beginOp()); + } + ~OmptInterfaceTargetDataOpRAII() { OMPT_IF_BUILT(endOp()); } + +private: + int32_t DeviceId; // Target device + int64_t Size; // Size of data transfer + void *CodePtr; // Return address + void *HostPtr; // Host data ptr + void *TgtPtr; // Target data ptr + ompt_target_data_op_t TgtDataOp; // Data transfer type + void beginOp() { + if (!ompt_enabled) + return; + CodePtr = OMPT_GET_RETURN_ADDRESS(0); + switch (TgtDataOp) { + case ompt_target_data_alloc: + case ompt_target_data_alloc_async: + ompt_interface.beginTargetDataAlloc(DeviceId, HostPtr, Size, CodePtr); + break; + case ompt_target_data_delete: + case ompt_target_data_delete_async: + ompt_interface.beginTargetDataDelete(DeviceId, TgtPtr, CodePtr); + break; + case ompt_target_data_transfer_to_device: + case ompt_target_data_transfer_to_device_async: + ompt_interface.beginTargetDataSubmit(DeviceId, TgtPtr, HostPtr, Size, + CodePtr); + break; + case ompt_target_data_transfer_from_device: + case ompt_target_data_transfer_from_device_async: + ompt_interface.beginTargetDataRetrieve(DeviceId, HostPtr, TgtPtr, Size, + CodePtr); + break; + default: + break; + } + } + void endOp() { + if (!ompt_enabled) + return; + switch (TgtDataOp) { + case ompt_target_data_alloc: + case ompt_target_data_alloc_async: + ompt_interface.endTargetDataAlloc(DeviceId, HostPtr, Size, CodePtr); + break; + case ompt_target_data_delete: + case ompt_target_data_delete_async: + ompt_interface.endTargetDataDelete(DeviceId, TgtPtr, CodePtr); + break; + case ompt_target_data_transfer_to_device: + case ompt_target_data_transfer_to_device_async: + ompt_interface.endTargetDataSubmit(DeviceId, TgtPtr, HostPtr, Size, + CodePtr); + break; + case ompt_target_data_transfer_from_device: + case ompt_target_data_transfer_from_device_async: + ompt_interface.endTargetDataRetrieve(DeviceId, HostPtr, TgtPtr, Size, + CodePtr); + break; + default: + break; + } + } +}; + int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const { // First, check if the user disabled atomic map transfer/malloc/dealloc. @@ -517,10 +590,17 @@ } void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { + /// RAII to establish tool anchors before and after data allocation + OmptInterfaceTargetDataOpRAII TgtDataAlloc( + RTLDeviceID, Size, HstPtr, nullptr /* TgtPtr */, ompt_target_data_alloc); return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); } int32_t DeviceTy::deleteData(void *TgtPtrBegin) { + /// RAII to establish tool anchors before and after data deletion + OmptInterfaceTargetDataOpRAII TgtDataDelete( + RTLDeviceID, 0 /* Size */, nullptr /* HostPtr */, TgtPtrBegin, + ompt_target_data_delete); return RTL->data_delete(RTLDeviceID, TgtPtrBegin); } @@ -540,6 +620,11 @@ : "unknown"); } + /// RAII to establish tool anchors before and after data submit + OmptInterfaceTargetDataOpRAII TargetDataSubmitRAII( + RTLDeviceID, Size, HstPtrBegin, TgtPtrBegin, + ompt_target_data_transfer_to_device); + if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); else @@ -562,6 +647,11 @@ : "unknown"); } + /// RAII to establish tool anchors before and after data retrieval + OmptInterfaceTargetDataOpRAII TargetDataRetrieve( + RTLDeviceID, Size, HstPtrBegin, TgtPtrBegin, + ompt_target_data_transfer_from_device); + if (!RTL->data_retrieve_async || !RTL->synchronize) return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); else diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "device.h" +#include "ompt_callback.h" #include "omptarget.h" #include "private.h" #include "rtl.h" @@ -21,6 +22,65 @@ #include #include +/// RAII used to invoke callbacks before and after target regions +struct OmptInterfaceTargetRAII { + OmptInterfaceTargetRAII(int64_t Id, ompt_target_t Op) + : CodePtr{nullptr}, DeviceId{Id}, TgtOp{Op} { + OMPT_IF_BUILT(beginOp()); + } + ~OmptInterfaceTargetRAII() { OMPT_IF_BUILT(endOp()); } + +private: + void *CodePtr; // Return address + int64_t DeviceId; // Target device + ompt_target_t TgtOp; // Target operation + void beginOp() { + if (!ompt_enabled) + return; + CodePtr = OMPT_GET_RETURN_ADDRESS(0); + switch (TgtOp) { + case ompt_target_enter_data: + case ompt_target_enter_data_nowait: + ompt_interface.beginTargetDataEnter(DeviceId, CodePtr); + break; + case ompt_target_exit_data: + case ompt_target_exit_data_nowait: + ompt_interface.beginTargetDataExit(DeviceId, CodePtr); + break; + case ompt_target_update: + case ompt_target_update_nowait: + ompt_interface.beginTargetUpdate(DeviceId, CodePtr); + break; + case ompt_target: + case ompt_target_nowait: + ompt_interface.beginTarget(DeviceId, CodePtr); + break; + } + } + void endOp() { + if (!ompt_enabled) + return; + switch (TgtOp) { + case ompt_target_enter_data: + case ompt_target_enter_data_nowait: + ompt_interface.endTargetDataEnter(DeviceId, CodePtr); + break; + case ompt_target_exit_data: + case ompt_target_exit_data_nowait: + ompt_interface.endTargetDataExit(DeviceId, CodePtr); + break; + case ompt_target_update: + case ompt_target_update_nowait: + ompt_interface.endTargetUpdate(DeviceId, CodePtr); + break; + case ompt_target: + case ompt_target_nowait: + ompt_interface.endTarget(DeviceId, CodePtr); + break; + } + } +}; + //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t flags) { @@ -112,6 +172,9 @@ } #endif + /// RAII to establish tool anchors before and after data begin + OmptInterfaceTargetRAII TargetDataBeginRAII(device_id, + ompt_target_enter_data); AsyncInfoTy AsyncInfo(Device); int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, AsyncInfo); @@ -181,6 +244,8 @@ } #endif + /// RAII to establish tool anchors before and after data end + OmptInterfaceTargetRAII TargetDataEndRAII(device_id, ompt_target_exit_data); AsyncInfoTy AsyncInfo(Device); int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, AsyncInfo); @@ -236,6 +301,9 @@ arg_names, "Updating OpenMP data"); DeviceTy &Device = *PM->Devices[device_id]; + + /// RAII to establish tool anchors before and after data update + OmptInterfaceTargetRAII TargetDataUpdateRAII(device_id, ompt_target_update); AsyncInfoTy AsyncInfo(Device); int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, AsyncInfo); @@ -300,6 +368,9 @@ #endif DeviceTy &Device = *PM->Devices[device_id]; + + /// RAII to establish tool anchors before and after target region + OmptInterfaceTargetRAII TargetRAII(device_id, ompt_target); AsyncInfoTy AsyncInfo(Device); int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, @@ -374,6 +445,9 @@ #endif DeviceTy &Device = *PM->Devices[device_id]; + + /// RAII to establish tool anchors before and after target region + OmptInterfaceTargetRAII TargetRAII(device_id, ompt_target); AsyncInfoTy AsyncInfo(Device); int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, arg_names, arg_mappers, team_num, thread_limit, diff --git a/openmp/libomptarget/src/ompt_callback.h b/openmp/libomptarget/src/ompt_callback.h --- a/openmp/libomptarget/src/ompt_callback.h +++ b/openmp/libomptarget/src/ompt_callback.h @@ -20,6 +20,8 @@ #define OMPT_IF_BUILT(stmt) #endif +#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) + #include "omp-tools.h" /// Used to maintain execution state for this thread diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -13,6 +13,7 @@ #include "omptarget.h" #include "device.h" +#include "ompt_callback.h" #include "private.h" #include "rtl.h" @@ -20,6 +21,27 @@ #include #include +/// RAII used to invoke callbacks before and after kernel launch +struct OmptInterfaceTargetSubmitRAII { + OmptInterfaceTargetSubmitRAII(int32_t Teams) : NumTeams{Teams} { + OMPT_IF_BUILT(beginSubmit()); + } + ~OmptInterfaceTargetSubmitRAII() { OMPT_IF_BUILT(endSubmit()); } + +private: + int32_t NumTeams; // Number of teams + void beginSubmit() { + if (!ompt_enabled) + return; + ompt_interface.beginTargetSubmit(NumTeams); + } + void endSubmit() { + if (!ompt_enabled) + return; + ompt_interface.endTargetSubmit(NumTeams); + } +}; + int AsyncInfoTy::synchronize() { int Result = OFFLOAD_SUCCESS; if (AsyncInfo.Queue) { @@ -1544,6 +1566,10 @@ { TIMESCOPE_WITH_NAME_AND_IDENT( IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", loc); + + /// RAII to establish tool anchors before and after kernel launch + OmptInterfaceTargetSubmitRAII TargetSubmitRAII(TeamNum); + if (IsTeamConstruct) Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0], TgtArgs.size(), TeamNum, ThreadLimit, diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -55,13 +55,12 @@ #define ompt_callback_implicit_task_implemented ompt_event_MAY_ALWAYS -#define ompt_callback_target_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_data_op_emi_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_implemented ompt_event_UNIMPLEMENTED -#define ompt_callback_target_submit_emi_implemented ompt_event_UNIMPLEMENTED - +#define ompt_callback_target_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_data_op_emi_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_implemented ompt_event_MAY_ALWAYS +#define ompt_callback_target_submit_emi_implemented ompt_event_MAY_ALWAYS #define ompt_callback_control_tool_implemented ompt_event_MAY_ALWAYS #define ompt_callback_device_initialize_implemented ompt_event_MAY_ALWAYS