Index: openmp/libomptarget/include/interop.h =================================================================== --- openmp/libomptarget/include/interop.h +++ openmp/libomptarget/include/interop.h @@ -143,6 +143,10 @@ typedef enum omp_interop_backend_type_t { // reserve 0 omp_interop_backend_type_cuda_1 = 1, + + omp_interop_backend_type_hip_1 = 5, + + omp_interop_backend_type_invalid_1 = 10 } omp_interop_backend_type_t; typedef enum kmp_interop_type_t { @@ -153,6 +157,7 @@ } kmp_interop_type_t; typedef enum omp_foreign_runtime_ids { + invalid = 0, cuda = 1, cuda_driver = 2, opencl = 3, @@ -164,15 +169,18 @@ /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. - omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type) - : interop_type(interop_type), device_id(device_id) {} + omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type, + omp_foreign_runtime_ids_t vendor_id, + intptr_t backend_type_id) + : interop_type(interop_type), device_id(device_id), vendor_id(vendor_id), + backend_type_id(backend_type_id) {} const char *err_str = nullptr; __tgt_async_info *async_info = nullptr; __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_foreign_runtime_ids_t vendor_id; + intptr_t backend_type_id; } omp_interop_val_t; #ifdef __cplusplus Index: openmp/libomptarget/include/omptarget.h =================================================================== --- openmp/libomptarget/include/omptarget.h +++ openmp/libomptarget/include/omptarget.h @@ -350,6 +350,7 @@ void __tgt_set_info_flag(uint32_t); int __tgt_print_device_info(int64_t DeviceId); + #ifdef __cplusplus } #endif Index: openmp/libomptarget/include/omptargetplugin.h =================================================================== --- openmp/libomptarget/include/omptargetplugin.h +++ openmp/libomptarget/include/omptargetplugin.h @@ -14,6 +14,7 @@ #ifndef _OMPTARGETPLUGIN_H_ #define _OMPTARGETPLUGIN_H_ +#include #include #ifdef __cplusplus @@ -162,6 +163,9 @@ // Print the device information void __tgt_rtl_print_device_info(int32_t ID); +// Set the runtime related information for interop object +void __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr); + // Event related interfaces. It is expected to use the interfaces in the // following way: // 1) Create an event on the target device (__tgt_rtl_create_event). Index: openmp/libomptarget/include/rtl.h =================================================================== --- openmp/libomptarget/include/rtl.h +++ openmp/libomptarget/include/rtl.h @@ -14,12 +14,11 @@ #define _OMPTARGET_RTL_H #include "omptarget.h" +#include "interop.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DynamicLibrary.h" -#include "omptarget.h" - #include #include #include @@ -65,6 +64,7 @@ typedef int32_t (*register_lib_ty)(__tgt_bin_desc *); typedef int32_t(supports_empty_images_ty)(); typedef void(print_device_info_ty)(int32_t); + typedef void(set_interop_info_ty)(omp_interop_val_t *); typedef void(set_info_flag_ty)(uint32_t); typedef int32_t(create_event_ty)(int32_t, void **); typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *); @@ -117,6 +117,7 @@ supports_empty_images_ty *supports_empty_images = nullptr; set_info_flag_ty *set_info_flag = nullptr; print_device_info_ty *print_device_info = nullptr; + set_interop_info_ty *set_interop_info = nullptr; create_event_ty *create_event = nullptr; record_event_ty *record_event = nullptr; wait_event_ty *wait_event = nullptr; Index: openmp/libomptarget/plugins/amdgpu/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -2681,4 +2681,71 @@ DeviceInfo().printDeviceInfo(DeviceId, DeviceInfo().HSAAgents[DeviceId]); } +int32_t __tgt_rtl_init_async_info(int32_t DeviceId, + __tgt_async_info **AsyncInfo) { + assert(DeviceInfo().isValidDeviceId(DeviceId) && "device_id is invalid"); + assert(AsyncInfo && "async_info is nullptr"); + + *AsyncInfo = new __tgt_async_info(); + if (DeviceInfo().HSAQueueSchedulers.size() <= DeviceId) { + // If the cooresponding device is not available, then set the error code to + // Queue field + (*AsyncInfo)->Queue = nullptr; + + return OFFLOAD_FAIL; + } + + // Retrieve the queue from per−device based HSA queue scheduler + hsa_queue_t *Queue = DeviceInfo().HSAQueueSchedulers[DeviceId].next(); + if (Queue != nullptr) { + (*AsyncInfo)->Queue = Queue; + + return OFFLOAD_SUCCESS; + } + + // If the queue object is not available , then set the error code to Queue + // field + (*AsyncInfo)->Queue = nullptr; + + return OFFLOAD_FAIL; +} + +int32_t __tgt_rtl_init_device_info(int32_t DeviceId, + __tgt_device_info *DeviceInfoPtr, + const char **ErrStr) { + assert(DeviceInfo().isValidDeviceId(DeviceId) && "device_id is invalid"); + assert(DeviceInfoPtr && "device_info_ptr is nullptr"); + + DeviceInfoPtr->Context = nullptr; + if (DeviceInfo().HSAAgents.size() <= DeviceId) { + // If the cooresponding device is not available, then set device as null + DeviceInfoPtr->Device = nullptr; + *ErrStr = "Device ID is invalid"; + + return OFFLOAD_FAIL; + } + + // Retrieve the HSA agent from per−device based HSA agent list + hsa_agent_t &Agent = DeviceInfo().HSAAgents[DeviceId]; + if (!DeviceInfoPtr->Device) { + DeviceInfoPtr->Device = &Agent; + } + + return OFFLOAD_SUCCESS; +} + +void __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr) { + assert(InteropPtr && "InteropPtr is nullptr"); + + // Set the vendor and backend runtime related information + InteropPtr->vendor_id = hip; + InteropPtr->backend_type_id = omp_interop_backend_type_hip_1; + + // Retrieve device information + __tgt_rtl_init_device_info(InteropPtr->device_id, &(InteropPtr)->device_info, + &(InteropPtr)->err_str); + + return; +} + } // extern "C" Index: openmp/libomptarget/plugins/cuda/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -1863,12 +1863,28 @@ assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); assert(DeviceInfoPtr && "device_info_ptr is nullptr"); - if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) { + *ErrStr = "Unable to set context for CUDA backend"; return OFFLOAD_FAIL; + } return DeviceRTL.initDeviceInfo(DeviceId, DeviceInfoPtr, ErrStr); } +void __tgt_rtl_set_interop_info(omp_interop_val_t *InteropPtr) { + assert(InteropPtr && "InteropPtr is nullptr"); + + // Set the vendor and backend runtime related information + InteropPtr->vendor_id = cuda; + InteropPtr->backend_type_id = omp_interop_backend_type_cuda_1; + + // Retrieve device information + __tgt_rtl_init_device_info(InteropPtr->device_id, &(InteropPtr)->device_info, + &(InteropPtr)->err_str); + + return; +} + #ifdef __cplusplus } #endif Index: openmp/libomptarget/src/interop.cpp =================================================================== --- openmp/libomptarget/src/interop.cpp +++ openmp/libomptarget/src/interop.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "interop.h" +#include "omptargetplugin.h" #include "private.h" namespace { @@ -57,6 +58,16 @@ return ("unknown"); } +const char *getBackendIdToStr(intptr_t BackendId) { + switch (BackendId) { + case omp_interop_backend_type_cuda_1: + return ("cuda backend"); + case omp_interop_backend_type_hip_1: + return ("hip backend"); + } + return ("unknown backend"); +} + template PropertyTy getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err); @@ -88,6 +99,8 @@ : "device+context"; case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); + case omp_ipr_fr_name: + return getBackendIdToStr(InteropVal.backend_type_id); default: getTypeMismatch(Property, Err); return nullptr; @@ -199,19 +212,27 @@ NoaliasDepList); } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); + DeviceTy &Device = *PM->Devices[DeviceId]; + if (!Device.RTL || !Device.RTL->set_interop_info) { + InteropPtr = omp_interop_none; + + return; + } + + // Create interop value object + InteropPtr = new omp_interop_val_t(DeviceId, InteropType, invalid, + omp_interop_backend_type_invalid_1); if (!deviceIsReady(DeviceId)) { + // If the coresponding device is not ready yet, the interop object is + // initialized with invalid flag InteropPtr->err_str = "Device not ready!"; + return; } - DeviceTy &Device = *PM->Devices[DeviceId]; - if (!Device.RTL || !Device.RTL->init_device_info || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; - } + // Retrieve the target specific interop value object + Device.RTL->set_interop_info(InteropPtr); + if (InteropType == kmp_interop_type_tasksync) { if (!Device.RTL || !Device.RTL->init_async_info || Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { Index: openmp/libomptarget/src/rtl.cpp =================================================================== --- openmp/libomptarget/src/rtl.cpp +++ openmp/libomptarget/src/rtl.cpp @@ -228,6 +228,8 @@ DynLibrary->getAddressOfSymbol("__tgt_rtl_set_info_flag"); *((void **)&RTL.print_device_info) = DynLibrary->getAddressOfSymbol("__tgt_rtl_print_device_info"); + *((void **)&RTL.set_interop_info) = + DynLibrary->getAddressOfSymbol("__tgt_rtl_set_interop_info"); *((void **)&RTL.create_event) = DynLibrary->getAddressOfSymbol("__tgt_rtl_create_event"); *((void **)&RTL.record_event) = Index: openmp/libomptarget/test/api/omp_interop_amdgpu.c =================================================================== --- /dev/null +++ openmp/libomptarget/test/api/omp_interop_amdgpu.c @@ -0,0 +1,77 @@ +// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O1 -mllvm +// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa +// REQUIRES: amdgcn-amd-amdhsa + +#include +#include +#include + +#define N 16384 + +void myVectorSet(int n, double s, double *x) { + for (int i = 0; i < n; ++ i) x[i] = s * (i + 1); +} + +void myVectorCopy(int n, double *x, double *y) { + for (int i = 0; i < n; ++ i) y[i] = x[i]; +} + +void myDscal(int n, double s, double *x) { + for (int i = 0; i < n; ++ i) x[i] = s* x[i]; +} + +int main () { + const double scalar = 2.0; + double x[N], y[N]; + int dev; + + omp_interop_t obj = omp_interop_none; + intptr_t type; + + dev = omp_get_default_device(); + +#pragma omp target nowait depend(out: x[0 : N]) map(from: x[0 : N]) device(dev) + myVectorSet(N, 1.0, x); + +#pragma omp task depend(out: y[0 : N]) + myVectorSet(N , -1.0, y); + +#pragma omp interop init(targetsync: obj) device(dev) depend(in: x[0 : N]) depend(inout: y[0 : N]) // get obj 4 syncing + + int id = (int)omp_get_interop_int(obj, omp_ipr_fr_id, NULL); + char* rt_name = (char*)omp_get_interop_str(obj, omp_ipr_fr_name, NULL); + + if (obj != omp_interop_none && id == omp_ifr_hip) { + printf("OpenMP working with %s runtime to execute async memcpy.\n", rt_name); + int rc; + omp_get_interop_ptr(obj, omp_ipr_targetsync, &rc); + + if (rc != omp_irc_success) { + fprintf(stderr, "ERROR: Failed to get %s stream, rt error = %d.\n", rt_name, rc); + if (rc == omp_irc_no_value) + fprintf(stderr, "Parameters valide, no meaningful value available."); + exit(1); + } + + myVectorCopy(N, x, y); + + } else { // Execute as OpenMP offload + printf("Notice: Offloading myCopy to perform memcpy.\n"); + +#pragma omp target depend(inout: y[0 : N]) depend(in: x[0 : N]) nowait map(to: x[0 : N]) map(tofrom: y[0 : N]) device(dev) + myVectorCopy(N, x, y); + } + + // This also ensures foreign tasks complete +#pragma omp interop destroy(obj) nowait depend(out: y[0 : N]) + +#pragma omp target depend(inout: x[0 : N]) + myDscal(N, scalar, x); + +#pragma omp taskwait + + printf("(1 : 16384) %f:%f\n", y[0], y[N-1]); + printf("(2 : 32768) %f:%f\n", x[0], x[N-1]); + + return 0; +}