Index: openmp/libomptarget/include/interop.h =================================================================== --- openmp/libomptarget/include/interop.h +++ openmp/libomptarget/include/interop.h @@ -67,7 +67,8 @@ omp_ifr_sycl = 4, omp_ifr_hip = 5, omp_ifr_level_zero = 6, - omp_ifr_last = 7 + omp_ifr_amdhsa = 7, + omp_ifr_last = 8 } omp_interop_fr_t; typedef void *omp_interop_t; @@ -143,6 +144,10 @@ typedef enum omp_interop_backend_type_t { // reserve 0 omp_interop_backend_type_cuda_1 = 1, + + omp_interop_backend_type_amdhsa_1 = 5, + + omp_interop_backend_type_invalid_1 = 10 } omp_interop_backend_type_t; typedef enum kmp_interop_type_t { @@ -153,26 +158,31 @@ } kmp_interop_type_t; typedef enum omp_foreign_runtime_ids { + invalid = 0, cuda = 1, cuda_driver = 2, opencl = 3, sycl = 4, hip = 5, level_zero = 6, + amdhsa = 7 } omp_foreign_runtime_ids_t; /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. - omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type) - : interop_type(interop_type), device_id(device_id) {} + omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type, + omp_foreign_runtime_ids_t vendor_id, + intptr_t backend_type_id) + : interop_type(interop_type), device_id(device_id), vendor_id(vendor_id), + backend_type_id(backend_type_id) {} const char *err_str = nullptr; __tgt_async_info *async_info = nullptr; __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_foreign_runtime_ids_t vendor_id; + intptr_t backend_type_id; } omp_interop_val_t; #ifdef __cplusplus Index: openmp/libomptarget/include/omptarget.h =================================================================== --- openmp/libomptarget/include/omptarget.h +++ openmp/libomptarget/include/omptarget.h @@ -350,6 +350,7 @@ void __tgt_set_info_flag(uint32_t); int __tgt_print_device_info(int64_t DeviceId); + #ifdef __cplusplus } #endif Index: openmp/libomptarget/include/omptargetplugin.h =================================================================== --- openmp/libomptarget/include/omptargetplugin.h +++ openmp/libomptarget/include/omptargetplugin.h @@ -14,6 +14,7 @@ #ifndef _OMPTARGETPLUGIN_H_ #define _OMPTARGETPLUGIN_H_ +#include #include #ifdef __cplusplus @@ -162,6 +163,9 @@ // Print the device information void __tgt_rtl_print_device_info(int32_t ID); +// Set the runtime related information for interop object +void __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr); + // Event related interfaces. It is expected to use the interfaces in the // following way: // 1) Create an event on the target device (__tgt_rtl_create_event). @@ -189,8 +193,6 @@ // } int32_t __tgt_rtl_init_async_info(int32_t ID, __tgt_async_info **AsyncInfoPtr); -int32_t __tgt_rtl_init_device_info(int32_t ID, __tgt_device_info *DeviceInfoPtr, - const char **ErrStr); #ifdef __cplusplus } Index: openmp/libomptarget/include/rtl.h =================================================================== --- openmp/libomptarget/include/rtl.h +++ openmp/libomptarget/include/rtl.h @@ -14,12 +14,11 @@ #define _OMPTARGET_RTL_H #include "omptarget.h" +#include "interop.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DynamicLibrary.h" -#include "omptarget.h" - #include #include #include @@ -65,6 +64,7 @@ typedef int32_t (*register_lib_ty)(__tgt_bin_desc *); typedef int32_t(supports_empty_images_ty)(); typedef void(print_device_info_ty)(int32_t); + typedef void(set_interop_info_ty)(omp_interop_val_t *); typedef void(set_info_flag_ty)(uint32_t); typedef int32_t(create_event_ty)(int32_t, void **); typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *); @@ -117,13 +117,13 @@ supports_empty_images_ty *supports_empty_images = nullptr; set_info_flag_ty *set_info_flag = nullptr; print_device_info_ty *print_device_info = nullptr; + set_interop_info_ty *set_interop_info = nullptr; create_event_ty *create_event = nullptr; record_event_ty *record_event = nullptr; wait_event_ty *wait_event = nullptr; sync_event_ty *sync_event = nullptr; destroy_event_ty *destroy_event = nullptr; init_async_info_ty *init_async_info = nullptr; - init_device_into_ty *init_device_info = nullptr; release_async_info_ty *release_async_info = nullptr; // Are there images associated with this RTL. Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp =================================================================== --- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -832,19 +832,6 @@ return (bool)Err; } -int32_t __tgt_rtl_init_device_info(int32_t DeviceId, - __tgt_device_info *DeviceInfo, - const char **ErrStr) { - *ErrStr = ""; - - auto Err = Plugin::get().getDevice(DeviceId).initDeviceInfo(DeviceInfo); - if (Err) - REPORT("Failure to initialize device info at " DPxMOD " on device %d: %s\n", - DPxPTR(DeviceInfo), DeviceId, toString(std::move(Err)).data()); - - return (bool)Err; -} - #ifdef __cplusplus } #endif Index: openmp/libomptarget/plugins/amdgpu/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -2681,4 +2681,39 @@ DeviceInfo().printDeviceInfo(DeviceId, DeviceInfo().HSAAgents[DeviceId]); } +int32_t __tgt_rtl_init_async_info(int32_t DeviceId, + __tgt_async_info **AsyncInfo) { + assert(DeviceInfo().isValidDeviceId(DeviceId) && "device_id is invalid"); + assert(AsyncInfo && "async_info is nullptr"); + + *AsyncInfo = new __tgt_async_info(); + + // Retrieve the queue from per−device based HSA queue scheduler + hsa_queue_t *Queue = DeviceInfo().HSAQueueSchedulers[DeviceId].next(); + (*AsyncInfo)->Queue = Queue; + + return OFFLOAD_SUCCESS; +} + +void __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr) { + assert(InteropPtr && "InteropPtr is nullptr"); + assert(DeviceInfo().isValidDeviceId(InteropPtr->device_id) && + "device_id is invalid"); + assert(&(InteropPtr)->device_info && "device_info_ptr is nullptr"); + + // Set the vendor and backend runtime related information + InteropPtr->vendor_id = amdhsa; + InteropPtr->backend_type_id = omp_interop_backend_type_amdhsa_1; + + // Retrieve device information + __tgt_device_info *DeviceInfoPtr = &(InteropPtr)->device_info; + DeviceInfoPtr->Context = nullptr; + hsa_agent_t &Agent = DeviceInfo().HSAAgents[InteropPtr->device_id]; + if (!DeviceInfoPtr->Device) { + DeviceInfoPtr->Device = &Agent; + } + + return; +} + } // extern "C" Index: openmp/libomptarget/plugins/cuda/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -1857,16 +1857,24 @@ return DeviceRTL.initAsyncInfo(DeviceId, AsyncInfo); } -int32_t __tgt_rtl_init_device_info(int32_t DeviceId, - __tgt_device_info *DeviceInfoPtr, - const char **ErrStr) { - assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); - assert(DeviceInfoPtr && "device_info_ptr is nullptr"); - - if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) - return OFFLOAD_FAIL; - - return DeviceRTL.initDeviceInfo(DeviceId, DeviceInfoPtr, ErrStr); +void __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr) { + assert(InteropPtr && "InteropPtr is nullptr"); + assert(DeviceRTL.isValidDeviceId(InteropPtr->device_id) && + "device_id is invalid"); + assert(&(InteropPtr)->device_info && "device_info_ptr is nullptr"); + + // Set the vendor and backend runtime related information + InteropPtr->vendor_id = cuda; + InteropPtr->backend_type_id = omp_interop_backend_type_cuda_1; + + // Retrieve device information + int32_t DeviceId = InteropPtr->device_id; + if (DeviceRTL.setContext(DeviceId) == OFFLOAD_SUCCESS) { + DeviceRTL.initDeviceInfo(DeviceId, &(InteropPtr)->device_info, + &(InteropPtr)->err_str); + } else { + InteropPtr->err_str = "Unable to set context for CUDA backend"; + } } #ifdef __cplusplus Index: openmp/libomptarget/src/interop.cpp =================================================================== --- openmp/libomptarget/src/interop.cpp +++ openmp/libomptarget/src/interop.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "interop.h" +#include "omptargetplugin.h" #include "private.h" namespace { @@ -53,10 +54,22 @@ return ("hip"); case level_zero: return ("level_zero"); + case amdhsa: + return ("amdhsa"); } return ("unknown"); } +const char *getBackendIdToStr(intptr_t BackendId) { + switch (BackendId) { + case omp_interop_backend_type_cuda_1: + return "cuda backend"; + case omp_interop_backend_type_amdhsa_1: + return "amdhsa backend"; + } + return "unknown backend"; +} + template PropertyTy getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err); @@ -88,6 +101,8 @@ : "device+context"; case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); + case omp_ipr_fr_name: + return getBackendIdToStr(InteropVal.backend_type_id); default: getTypeMismatch(Property, Err); return nullptr; @@ -199,19 +214,27 @@ NoaliasDepList); } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); + DeviceTy &Device = *PM->Devices[DeviceId]; + if (!Device.RTL || !Device.RTL->set_interop_info) { + InteropPtr = omp_interop_none; + + return; + } + + // Create interop value object + InteropPtr = new omp_interop_val_t(DeviceId, InteropType, invalid, + omp_interop_backend_type_invalid_1); if (!deviceIsReady(DeviceId)) { + // If the coresponding device is not ready yet, the interop object is + // initialized with invalid flag InteropPtr->err_str = "Device not ready!"; + return; } - DeviceTy &Device = *PM->Devices[DeviceId]; - if (!Device.RTL || !Device.RTL->init_device_info || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; - } + // Retrieve the target specific interop value object + Device.RTL->set_interop_info(InteropPtr); + if (InteropType == kmp_interop_type_tasksync) { if (!Device.RTL || !Device.RTL->init_async_info || Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { Index: openmp/libomptarget/src/rtl.cpp =================================================================== --- openmp/libomptarget/src/rtl.cpp +++ openmp/libomptarget/src/rtl.cpp @@ -228,6 +228,8 @@ DynLibrary->getAddressOfSymbol("__tgt_rtl_set_info_flag"); *((void **)&RTL.print_device_info) = DynLibrary->getAddressOfSymbol("__tgt_rtl_print_device_info"); + *((void **)&RTL.set_interop_info) = + DynLibrary->getAddressOfSymbol("__tgt_rtl_set_interop_info"); *((void **)&RTL.create_event) = DynLibrary->getAddressOfSymbol("__tgt_rtl_create_event"); *((void **)&RTL.record_event) = @@ -242,8 +244,6 @@ DynLibrary->getAddressOfSymbol("__tgt_rtl_release_async_info"); *((void **)&RTL.init_async_info) = DynLibrary->getAddressOfSymbol("__tgt_rtl_init_async_info"); - *((void **)&RTL.init_device_info) = - DynLibrary->getAddressOfSymbol("__tgt_rtl_init_device_info"); RTL.LibraryHandler = std::move(DynLibrary); Index: openmp/libomptarget/test/api/omp_interop_amdgpu.c =================================================================== --- /dev/null +++ openmp/libomptarget/test/api/omp_interop_amdgpu.c @@ -0,0 +1,85 @@ +// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 -mllvm +// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa +// REQUIRES: amdgcn-amd-amdhsa + +#include +#include +#include + +#define N 16384 + +void myVectorSet(int n, double s, double *x) { + for (int i = 0; i < n; ++i) + x[i] = s * (i + 1); +} + +void myVectorCopy(int n, double *x, double *y) { + for (int i = 0; i < n; ++i) + y[i] = x[i]; +} + +void myDscal(int n, double s, double *x) { + for (int i = 0; i < n; ++i) + x[i] = s * x[i]; +} + +int main() { + const double scalar = 2.0; + double x[N], y[N]; + int dev; + + omp_interop_t obj = omp_interop_none; + intptr_t type; + + dev = omp_get_default_device(); + +#pragma omp target nowait depend(out : x[0 : N]) map(from : x[0 : N]) \ + device(dev) + myVectorSet(N, 1.0, x); + +#pragma omp task depend(out : y[0 : N]) + myVectorSet(N, -1.0, y); + +#pragma omp interop init(targetsync : obj) device(dev) depend(in : x[0 : N]) \ + depend(inout : y[0 : N]) // get obj 4 syncing + + int id = (int)omp_get_interop_int(obj, omp_ipr_fr_id, NULL); + char *rt_name = (char *)omp_get_interop_str(obj, omp_ipr_fr_name, NULL); + + if (obj != omp_interop_none && id == omp_ifr_amdhsa) { + printf("OpenMP working with %s runtime to execute async memcpy.\n", + rt_name); + int rc; + omp_get_interop_ptr(obj, omp_ipr_targetsync, &rc); + + if (rc != omp_irc_success) { + fprintf(stderr, "ERROR: Failed to get %s stream, rt error = %d.\n", + rt_name, rc); + if (rc == omp_irc_no_value) + fprintf(stderr, "Parameters valide, no meaningful value available."); + exit(1); + } + + myVectorCopy(N, x, y); + + } else { // Execute as OpenMP offload + printf("Notice: Offloading myCopy to perform memcpy.\n"); + +#pragma omp target depend(inout : y[0 : N]) depend(in : x[0 : N]) nowait map( \ + to : x[0 : N]) map(tofrom : y[0 : N]) device(dev) + myVectorCopy(N, x, y); + } + + // This also ensures foreign tasks complete +#pragma omp interop destroy(obj) nowait depend(out : y[0 : N]) + +#pragma omp target depend(inout : x[0 : N]) + myDscal(N, scalar, x); + +#pragma omp taskwait + + printf("(1 : 16384) %f:%f\n", y[0], y[N - 1]); + printf("(2 : 32768) %f:%f\n", x[0], x[N - 1]); + + return 0; +}