diff --git a/openmp/libomptarget/include/interop.h b/openmp/libomptarget/include/interop.h --- a/openmp/libomptarget/include/interop.h +++ b/openmp/libomptarget/include/interop.h @@ -67,7 +67,8 @@ omp_ifr_sycl = 4, omp_ifr_hip = 5, omp_ifr_level_zero = 6, - omp_ifr_last = 7 + omp_ifr_amdhsa = 7, + omp_ifr_last = 8 } omp_interop_fr_t; typedef void *omp_interop_t; @@ -118,7 +119,9 @@ typedef enum omp_interop_backend_type_t { // reserve 0 - omp_interop_backend_type_cuda_1 = 1, + omp_interop_backend_type_cuda = 1, + omp_interop_backend_type_amdhsa = 7, + omp_interop_backend_type_invalid = 8 } omp_interop_backend_type_t; typedef enum kmp_interop_type_t { @@ -129,26 +132,31 @@ } kmp_interop_type_t; typedef enum omp_foreign_runtime_ids { + invalid = 0, cuda = 1, cuda_driver = 2, opencl = 3, sycl = 4, hip = 5, level_zero = 6, + amdhsa = 7 } omp_foreign_runtime_ids_t; /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. - omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type) - : interop_type(interop_type), device_id(device_id) {} + omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type, + omp_foreign_runtime_ids_t vendor_id, + intptr_t backend_type_id) + : interop_type(interop_type), device_id(device_id), vendor_id(vendor_id), + backend_type_id(backend_type_id) {} const char *err_str = nullptr; __tgt_async_info *async_info = nullptr; __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_foreign_runtime_ids_t vendor_id; + intptr_t backend_type_id; } omp_interop_val_t; #ifdef __cplusplus diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -433,6 +433,7 @@ void __tgt_set_info_flag(uint32_t); int __tgt_print_device_info(int64_t DeviceId); + #ifdef __cplusplus } #endif diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -14,6 +14,7 @@ #ifndef _OMPTARGETPLUGIN_H_ #define _OMPTARGETPLUGIN_H_ +#include #include #ifdef __cplusplus @@ -172,6 +173,9 @@ // Print the device information void __tgt_rtl_print_device_info(int32_t ID); +// Set the runtime related information for interop object +int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr); + // Event related interfaces. It is expected to use the interfaces in the // following way: // 1) Create an event on the target device (__tgt_rtl_create_event). diff --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h --- a/openmp/libomptarget/include/rtl.h +++ b/openmp/libomptarget/include/rtl.h @@ -13,13 +13,12 @@ #ifndef _OMPTARGET_RTL_H #define _OMPTARGET_RTL_H +#include "interop.h" #include "omptarget.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DynamicLibrary.h" -#include "omptarget.h" - #include #include #include @@ -58,6 +57,7 @@ typedef int32_t (*register_lib_ty)(__tgt_bin_desc *); typedef int32_t(supports_empty_images_ty)(); typedef void(print_device_info_ty)(int32_t); + typedef void(set_interop_info_ty)(omp_interop_val_t *); typedef void(set_info_flag_ty)(uint32_t); typedef int32_t(create_event_ty)(int32_t, void **); typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *); @@ -112,6 +112,7 @@ supports_empty_images_ty *supports_empty_images = nullptr; set_info_flag_ty *set_info_flag = nullptr; print_device_info_ty *print_device_info = nullptr; + set_interop_info_ty *set_interop_info = nullptr; create_event_ty *create_event = nullptr; record_event_ty *record_event = nullptr; wait_event_ty *wait_event = nullptr; diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2397,6 +2397,19 @@ return Queues[Current % Queues.size()]; } + virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override { + InterOpPtr->vendor_id = amdhsa; + InterOpPtr->backend_type_id = omp_interop_backend_type_amdhsa; + + __tgt_device_info *DevInfo = &InterOpPtr->device_info; + DevInfo->Context = nullptr; + if (!DevInfo->Device) { + DevInfo->Device = &Agent; + } + + return Plugin::success(); + } + private: using AMDGPUStreamRef = AMDGPUResourceRef; using AMDGPUEventRef = AMDGPUResourceRef; diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -26,6 +26,7 @@ #include "MemoryManager.h" #include "RPC.h" #include "Utilities.h" +#include "interop.h" #include "omptarget.h" #ifdef OMPT_SUPPORT @@ -764,6 +765,10 @@ /// Get the RPC server running on this device. RPCServerTy *getRPCServer() const { return RPCServer; } + virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) { + return Error::success(); + } + private: /// Register offload entry for global variable. Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage, @@ -957,7 +962,6 @@ /// Indicate whether the plugin supports empty images. virtual bool supportsEmptyImages() const { return false; } -protected: /// Indicate whether a device id is valid. bool isValidDeviceId(int32_t DeviceId) const { return (DeviceId >= 0 && DeviceId < getNumDevices()); diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -1645,6 +1645,21 @@ return OFFLOAD_SUCCESS; } +int32_t __tgt_rtl_set_interop_info(omp_interop_val_t *InterOpPtr) { + assert(InterOpPtr && "Interop object is allocated"); + int32_t DevId = InterOpPtr->device_id; + + assert(Plugin::get().isValidDeviceId(DevId) && "Device Id is valid"); + if (auto Err = Plugin::get().getDevice(DevId).setInteropInfo(InterOpPtr)) { + REPORT("Failure to determine the OpenMP interop object info for Device Id " + "%i\n", + DevId); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} + #ifdef __cplusplus } #endif diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -850,6 +850,10 @@ /// Returns the clock frequency for the given NVPTX device. uint64_t getClockFrequency() const override { return 1000000000; } + virtual Error setInteropInfo(omp_interop_val_t *InterOpPtr) override { + return Plugin::success(); + } + private: using CUDAStreamManagerTy = GenericDeviceResourceManagerTy; using CUDAEventManagerTy = GenericDeviceResourceManagerTy; diff --git a/openmp/libomptarget/src/interop.cpp b/openmp/libomptarget/src/interop.cpp --- a/openmp/libomptarget/src/interop.cpp +++ b/openmp/libomptarget/src/interop.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "interop.h" +#include "omptargetplugin.h" #include "private.h" namespace { @@ -53,8 +54,21 @@ return ("hip"); case level_zero: return ("level_zero"); + case amdhsa: + return ("amdhsa"); + default: + return ("unknown"); + } +} + +const char *getBackendIdToStr(intptr_t BackendId) { + switch (BackendId) { + case omp_interop_backend_type_cuda: + return "cuda backend"; + case omp_interop_backend_type_amdhsa: + return "amdhsa backend"; } - return ("unknown"); + return "unknown backend"; } template @@ -88,6 +102,8 @@ : "device+context"; case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); + case omp_ipr_fr_name: + return getBackendIdToStr(InteropVal.backend_type_id); default: getTypeMismatch(Property, Err); return nullptr; @@ -199,19 +215,27 @@ NoaliasDepList); } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); + DeviceTy &Device = *PM->Devices[DeviceId]; + if (!Device.RTL || !Device.RTL->set_interop_info) { + InteropPtr = omp_interop_none; + + return; + } + + // Create interop value object + InteropPtr = new omp_interop_val_t(DeviceId, InteropType, invalid, + omp_interop_backend_type_invalid); if (!deviceIsReady(DeviceId)) { + // If the coresponding device is not ready yet, the interop object is + // initialized with invalid flag InteropPtr->err_str = "Device not ready!"; + return; } - DeviceTy &Device = *PM->Devices[DeviceId]; - if (!Device.RTL || !Device.RTL->init_device_info || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; - } + // Retrieve the target specific interop value object + Device.RTL->set_interop_info(InteropPtr); + if (InteropType == kmp_interop_type_tasksync) { if (!Device.RTL || !Device.RTL->init_async_info || Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -225,6 +225,8 @@ DynLibrary->getAddressOfSymbol("__tgt_rtl_set_info_flag"); *((void **)&RTL.print_device_info) = DynLibrary->getAddressOfSymbol("__tgt_rtl_print_device_info"); + *((void **)&RTL.set_interop_info) = + DynLibrary->getAddressOfSymbol("__tgt_rtl_set_interop_info"); *((void **)&RTL.create_event) = DynLibrary->getAddressOfSymbol("__tgt_rtl_create_event"); *((void **)&RTL.record_event) = diff --git a/openmp/libomptarget/test/api/omp_interop_amdgpu.c b/openmp/libomptarget/test/api/omp_interop_amdgpu.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/api/omp_interop_amdgpu.c @@ -0,0 +1,87 @@ +// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 +// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa +// REQUIRES: amdgcn-amd-amdhsa + +#include +#include +#include + +#define N 16384 + +void vectorSet(int n, double s, double *x) { + for (int i = 0; i < n; ++i) + x[i] = s * (i + 1); +} + +void vectorCopy(int n, double *x, double *y) { + for (int i = 0; i < n; ++i) + y[i] = x[i]; +} + +void vectorScale(int n, double s, double *x) { + for (int i = 0; i < n; ++i) + x[i] = s * x[i]; +} + +int main() { + const double ScaleFactor = 2.0; + double x[N], y[N]; + omp_interop_t SyncObj = omp_interop_none; + int DeviceNum = omp_get_default_device(); + +#pragma omp target nowait depend(out : x[0 : N]) map(from : x[0 : N]) \ + device(DeviceNum) + vectorSet(N, 1.0, x); + +#pragma omp task depend(out : y[0 : N]) + vectorSet(N, -1.0, y); + + // Get SyncObject for synchronization +#pragma omp interop init(targetsync : SyncObj) device(DeviceNum) \ + depend(in : x[0 : N]) depend(inout : y[0 : N]) + + int ForeignContextId = (int)omp_get_interop_int(SyncObj, omp_ipr_fr_id, NULL); + char *ForeignContextName = + (char *)omp_get_interop_str(SyncObj, omp_ipr_fr_name, NULL); + + if (SyncObj != omp_interop_none && ForeignContextId == omp_ifr_amdhsa) { + printf("OpenMP working with %s runtime to execute async memcpy.\n", + ForeignContextName); + int Status; + omp_get_interop_ptr(SyncObj, omp_ipr_targetsync, &Status); + + if (Status != omp_irc_success) { + fprintf(stderr, "ERROR: Failed to get %s stream, rt error = %d.\n", + ForeignContextName, Status); + if (Status == omp_irc_no_value) + fprintf(stderr, "Parameters valid, but no meaningful value available."); + exit(1); + } + + vectorCopy(N, x, y); + } else { + // Execute as OpenMP offload + printf("Notice: Offloading myCopy to perform memcpy.\n"); + +#pragma omp target depend(inout : y[0 : N]) depend(in : x[0 : N]) \ + nowait map(to : x[0 : N]) map(tofrom : y[0 : N]) device(DeviceNum) + vectorCopy(N, x, y); + } + + // This also ensures foreign tasks complete +#pragma omp interop destroy(SyncObj) nowait depend(out : y[0 : N]) + +#pragma omp target depend(inout : x[0 : N]) + vectorScale(N, ScaleFactor, x); + +#pragma omp taskwait + + printf("(1 : 16384) %f:%f\n", y[0], y[N - 1]); + printf("(2 : 32768) %f:%f\n", x[0], x[N - 1]); + + return 0; +} + +// ToDo: Add meaningful checks; the following is a placeholder. + +// CHECK: OpenMP working with amdhsa backend runtime to execute async memcpy diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var --- a/openmp/runtime/src/include/omp.h.var +++ b/openmp/runtime/src/include/omp.h.var @@ -189,7 +189,8 @@ omp_ifr_sycl = 4, omp_ifr_hip = 5, omp_ifr_level_zero = 6, - omp_ifr_last = 7 + omp_ifr_amdhsa = 7, + omp_ifr_last = 8 } omp_interop_fr_t; typedef void * omp_interop_t; diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var --- a/openmp/runtime/src/include/omp_lib.h.var +++ b/openmp/runtime/src/include/omp_lib.h.var @@ -261,8 +261,10 @@ parameter(omp_ifr_hip=5) integer(kind=omp_interop_fr_kind)omp_ifr_level_zero parameter(omp_ifr_level_zero=6) + integer(kind=omp_interop_fr_kind)omp_ifr_amdhsa + parameter(omp_ifr_amdhsa=7) integer(kind=omp_interop_fr_kind)omp_ifr_last - parameter(omp_ifr_last=7) + parameter(omp_ifr_last=8) integer(kind=omp_interop_kind)omp_interop_none parameter(omp_interop_none=0) diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -1542,7 +1542,8 @@ omp_ifr_sycl = 4, omp_ifr_hip = 5, omp_ifr_level_zero = 6, - omp_ifr_last = 7 + omp_ifr_amdhsa = 7, + omp_ifr_last = 8 } omp_interop_fr_t; typedef void *omp_interop_t;