diff --git a/openmp/libomptarget/include/interop.h b/openmp/libomptarget/include/interop.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/include/interop.h @@ -0,0 +1,181 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _INTEROP_H_ +#define _INTEROP_H_ + +#include "omptarget.h" +#include + +#if defined(_WIN32) +#define __KAI_KMPC_CONVENTION __cdecl +#ifndef __KMP_IMP +#define __KMP_IMP __declspec(dllimport) +#endif +#else +#define __KAI_KMPC_CONVENTION +#ifndef __KMP_IMP +#define __KMP_IMP +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/// TODO: Include the `omp.h` of the current build +/* OpenMP 5.1 interop */ +typedef intptr_t omp_intptr_t; + +/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined + * properties */ +typedef enum omp_interop_property { + omp_ipr_fr_id = -1, + omp_ipr_fr_name = -2, + omp_ipr_vendor = -3, + omp_ipr_vendor_name = -4, + omp_ipr_device_num = -5, + omp_ipr_platform = -6, + omp_ipr_device = -7, + omp_ipr_device_context = -8, + omp_ipr_targetsync = -9, + omp_ipr_first = -9 +} omp_interop_property_t; + +#define omp_interop_none 0 + +typedef enum omp_interop_rc { + omp_irc_no_value = 1, + omp_irc_success = 0, + omp_irc_empty = -1, + omp_irc_out_of_range = -2, + omp_irc_type_int = -3, + omp_irc_type_ptr = -4, + omp_irc_type_str = -5, + omp_irc_other = -6 +} omp_interop_rc_t; + +typedef enum omp_interop_fr { + omp_ifr_cuda = 1, + omp_ifr_cuda_driver = 2, + omp_ifr_opencl = 3, + omp_ifr_sycl = 4, + omp_ifr_hip = 5, + omp_ifr_level_zero = 6, + omp_ifr_last = 7 +} omp_interop_fr_t; + +typedef void *omp_interop_t; + +/*! + * The `omp_get_num_interop_properties` routine retrieves the number of + * implementation-defined properties available for an `omp_interop_t` object. + */ +int __KAI_KMPC_CONVENTION omp_get_num_interop_properties(const omp_interop_t); +/*! + * The `omp_get_interop_int` routine retrieves an integer property from an + * `omp_interop_t` object. + */ +omp_intptr_t __KAI_KMPC_CONVENTION omp_get_interop_int(const omp_interop_t, + omp_interop_property_t, + int *); +/*! + * The `omp_get_interop_ptr` routine retrieves a pointer property from an + * `omp_interop_t` object. + */ +void *__KAI_KMPC_CONVENTION omp_get_interop_ptr(const omp_interop_t, + omp_interop_property_t, int *); +/*! + * The `omp_get_interop_str` routine retrieves a string property from an + * `omp_interop_t` object. + */ +const char *__KAI_KMPC_CONVENTION omp_get_interop_str(const omp_interop_t, + omp_interop_property_t, + int *); +/*! + * The `omp_get_interop_name` routine retrieves a property name from an + * `omp_interop_t` object. + */ +const char *__KAI_KMPC_CONVENTION omp_get_interop_name(const omp_interop_t, + omp_interop_property_t); +/*! + * The `omp_get_interop_type_desc` routine retrieves a description of the type + * of a property associated with an `omp_interop_t` object. + */ +const char *__KAI_KMPC_CONVENTION +omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); +/*! + * The `omp_get_interop_rc_desc` routine retrieves a description of the return + * code associated with an `omp_interop_t` object. + */ +extern const char *__KAI_KMPC_CONVENTION +omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); + +typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ + /* Compiler flags */ /* Total compiler flags must be 16 bits */ + unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ + unsigned final : 1; /* task is final(1) so execute immediately */ + unsigned merged_if0 : 1; // no __kmpc_task_{begin/complete}_if0 calls in if0 + unsigned destructors_thunk : 1; // set if the compiler creates a thunk to + unsigned proxy : 1; // task is a proxy task (it will be executed outside the + unsigned priority_specified : 1; // set if the compiler provides priority + unsigned detachable : 1; // 1 == can detach */ + unsigned unshackled : 1; /* 1 == unshackled task */ + unsigned target : 1; /* 1 == target task */ + unsigned reserved : 7; /* reserved for compiler use */ + unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ + unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) + unsigned tasking_ser : 1; // all tasks in team are either executed immediately + unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel + unsigned started : 1; /* 1==started, 0==not started */ + unsigned executing : 1; /* 1==executing, 0==not executing */ + unsigned complete : 1; /* 1==complete, 0==not complete */ + unsigned freed : 1; /* 1==freed, 0==allocated */ + unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ + unsigned reserved31 : 7; /* reserved for library use */ +} kmp_tasking_flags_t; + +typedef enum omp_interop_backend_type_t { + // reserve 0 + omp_interop_backend_type_cuda_1 = 1, +} omp_interop_backend_type_t; + +typedef enum kmp_interop_type_t { + kmp_interop_type_unknown = -1, + kmp_interop_type_platform, + kmp_interop_type_device, + kmp_interop_type_tasksync, +} kmp_interop_type_t; + +typedef enum omp_foreign_runtime_ids { + cuda = 1, + cuda_driver = 2, + opencl = 3, + sycl = 4, + hip = 5, + level_zero = 6, +} omp_foreign_runtime_ids_t; + +/// The interop value type, aka. the interop object. +typedef struct omp_interop_val_t { + /// Device and interop-type are determined at construction time and fix. + omp_interop_val_t(intptr_t device_id, kmp_interop_type_t interop_type) + : interop_type(interop_type), device_id(device_id) {} + const char *err_str = nullptr; + __tgt_async_info *async_info = nullptr; + __tgt_device_info device_info; + const kmp_interop_type_t interop_type; + const intptr_t device_id; + const omp_foreign_runtime_ids_t vendor_id = cuda; + const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; +} omp_interop_val_t; + +#ifdef __cplusplus +} +#endif +#endif diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -176,6 +176,11 @@ uint64_t Stride; }; +struct __tgt_device_info { + void *Context = nullptr; + void *Device = nullptr; +}; + #ifdef __cplusplus extern "C" { #endif diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -142,6 +142,10 @@ // Set plugin's internal information flag externally. void __tgt_rtl_set_info_flag(uint32_t); +int32_t __tgt_rtl_init_async_info(int32_t ID, __tgt_async_info **AsyncInfoPtr); +int32_t __tgt_rtl_init_device_info(int32_t ID, __tgt_device_info *DeviceInfoPtr, + const char **errStr); + #ifdef __cplusplus } #endif diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -421,6 +421,7 @@ E.Table.EntriesBegin = E.Table.EntriesEnd = nullptr; } +public: CUstream getStream(const int DeviceId, __tgt_async_info *AsyncInfo) const { assert(AsyncInfo && "AsyncInfo is nullptr"); @@ -430,7 +431,6 @@ return reinterpret_cast(AsyncInfo->Queue); } -public: // This class should not be copied DeviceRTLTy(const DeviceRTLTy &) = delete; DeviceRTLTy(DeviceRTLTy &&) = delete; @@ -1118,6 +1118,45 @@ } return (Err == CUDA_SUCCESS) ? OFFLOAD_SUCCESS : OFFLOAD_FAIL; } + + int releaseAsyncInfo(int DeviceId, __tgt_async_info *AsyncInfo) const { + if (AsyncInfo->Queue) { + StreamManager->returnStream(DeviceId, + reinterpret_cast(AsyncInfo->Queue)); + AsyncInfo->Queue = nullptr; + } + + return OFFLOAD_SUCCESS; + } + + int initAsyncInfo(int DeviceId, __tgt_async_info **AsyncInfo) const { + CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); + if (!checkResult(Err, "error returned from cuCtxSetCurrent")) + return OFFLOAD_FAIL; + + *AsyncInfo = new __tgt_async_info; + getStream(DeviceId, *AsyncInfo); + return OFFLOAD_SUCCESS; + } + + int initDeviceInfo(const int DeviceId, __tgt_device_info *DeviceInfo, + const char **errStr) const { + assert(DeviceInfo && "DeviceInfo is nullptr"); + + if (!DeviceInfo->Context) + DeviceInfo->Context = DeviceData[DeviceId].Context; + if (!DeviceInfo->Device) { + CUdevice Dev; + CUresult Err = cuDeviceGet(&Dev, DeviceId); + if (Err == CUDA_SUCCESS) { + DeviceInfo->Device = reinterpret_cast(Dev); + } else { + cuGetErrorString(Err, errStr); + return OFFLOAD_FAIL; + } + } + return OFFLOAD_SUCCESS; + } }; DeviceRTLTy DeviceRTL; @@ -1313,11 +1352,36 @@ return DeviceRTL.synchronize(device_id, async_info_ptr); } +int32_t __tgt_rtl_release_async_info(int32_t device_id, + __tgt_async_info *async_info) { + assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); + assert(async_info && "async_info is nullptr"); + + return DeviceRTL.releaseAsyncInfo(device_id, async_info); +} + +int32_t __tgt_rtl_init_async_info(int32_t device_id, + __tgt_async_info **async_info) { + assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); + assert(async_info && "async_info is nullptr"); + + return DeviceRTL.initAsyncInfo(device_id, async_info); +} + void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) { std::atomic &InfoLevel = getInfoLevelInternal(); InfoLevel.store(NewInfoLevel); } +int32_t __tgt_rtl_init_device_info(int32_t device_id, + __tgt_device_info *device_info_ptr, + const char **errStr) { + assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); + assert(device_info_ptr && "device_info_ptr is nullptr"); + + return DeviceRTL.initDeviceInfo(device_id, device_info_ptr, errStr); +} + #ifdef __cplusplus } #endif diff --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports --- a/openmp/libomptarget/plugins/exports +++ b/openmp/libomptarget/plugins/exports @@ -23,6 +23,8 @@ __tgt_rtl_unregister_lib; __tgt_rtl_supports_empty_images; __tgt_rtl_set_info_flag; + __tgt_rtl_init_device_info; + __tgt_rtl_init_async_info; local: *; }; diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -16,8 +16,9 @@ ${CMAKE_CURRENT_SOURCE_DIR}/api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/rtl.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/interop.cpp ${CMAKE_CURRENT_SOURCE_DIR}/omptarget.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/rtl.cpp ) set(LIBOMPTARGET_SRC_FILES ${LIBOMPTARGET_SRC_FILES} PARENT_SCOPE) diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -40,6 +40,15 @@ llvm_omp_target_alloc_shared; llvm_omp_target_alloc_device; __tgt_set_info_flag; + omp_get_interop_ptr; + omp_get_interop_str; + omp_get_interop_int; + omp_get_interop_name; + omp_get_interop_type_desc; + omp_get_interop_rc_desc; + __tgt_interop_init; + __tgt_interop_use; + __tgt_interop_destroy; local: *; }; diff --git a/openmp/libomptarget/src/interop.cpp b/openmp/libomptarget/src/interop.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/src/interop.cpp @@ -0,0 +1,285 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "interop.h" +#include "private.h" + +namespace { +omp_interop_rc_t getPropertyErrorType(omp_interop_property_t Property) { + switch (Property) { + case omp_ipr_fr_id: + return omp_irc_type_int; + case omp_ipr_fr_name: + return omp_irc_type_str; + case omp_ipr_vendor: + return omp_irc_type_int; + case omp_ipr_vendor_name: + return omp_irc_type_str; + case omp_ipr_device_num: + return omp_irc_type_int; + case omp_ipr_platform: + return omp_irc_type_int; + case omp_ipr_device: + return omp_irc_type_ptr; + case omp_ipr_device_context: + return omp_irc_type_ptr; + case omp_ipr_targetsync: + return omp_irc_type_ptr; + }; + return omp_irc_no_value; +} + +void getTypeMismatch(omp_interop_property_t Property, int *Err) { + if (Err) + *Err = getPropertyErrorType(Property); +} + +const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { + switch (VendorId) { + case cuda: + return ("cuda"); + case cuda_driver: + return ("cuda_driver"); + case opencl: + return ("opencl"); + case sycl: + return ("sycl"); + case hip: + return ("hip"); + case level_zero: + return ("level_zero"); + } + return ("unknown"); +} + +template +PropertyTy getProperty(omp_interop_val_t &InteropVal, + omp_interop_property_t Property, int *Err); + +template <> +intptr_t getProperty(omp_interop_val_t &interop_val, + omp_interop_property_t property, int *err) { + switch (property) { + case omp_ipr_fr_id: + return interop_val.backend_type_id; + case omp_ipr_vendor: + return interop_val.vendor_id; + case omp_ipr_device_num: + return interop_val.device_id; + default:; + } + getTypeMismatch(property, err); + return 0; +} + +template <> +const char *getProperty(omp_interop_val_t &interop_val, + omp_interop_property_t property, + int *err) { + switch (property) { + case omp_ipr_fr_id: + return interop_val.interop_type == kmp_interop_type_tasksync + ? "tasksync" + : "device+context"; + case omp_ipr_vendor_name: + return getVendorIdToStr(interop_val.vendor_id); + default: + getTypeMismatch(property, err); + return nullptr; + } +} + +template <> +void *getProperty(omp_interop_val_t &interop_val, + omp_interop_property_t property, int *err) { + switch (property) { + case omp_ipr_device: + if (interop_val.device_info.Device) + return interop_val.device_info.Device; + *err = omp_irc_no_value; + return const_cast(interop_val.err_str); + case omp_ipr_device_context: + return interop_val.device_info.Context; + case omp_ipr_targetsync: + return interop_val.async_info->Queue; + default:; + } + getTypeMismatch(property, err); + return nullptr; +} + +bool getPropertyCheck(omp_interop_val_t **interop_ptr, + omp_interop_property_t property, int *err) { + if (err) + *err = omp_irc_success; + if (!interop_ptr) { + if (err) + *err = omp_irc_empty; + return false; + } + if (property >= 0 || property < omp_ipr_first) { + if (err) + *err = omp_irc_out_of_range; + return false; + } + if (property == omp_ipr_targetsync && + (*interop_ptr)->interop_type != kmp_interop_type_tasksync) { + if (err) + *err = omp_irc_other; + return false; + } + if ((property == omp_ipr_device || property == omp_ipr_device_context) && + (*interop_ptr)->interop_type == kmp_interop_type_tasksync) { + if (err) + *err = omp_irc_other; + return false; + } + return true; +} + +} // namespace + +#define __OMP_GET_INTEROP_TY(RETURN_TYPE, SUFFIX) \ + RETURN_TYPE omp_get_interop_##SUFFIX(const omp_interop_t interop, \ + omp_interop_property_t property_id, \ + int *err) { \ + omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ + assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + if (!getPropertyCheck(&interop_val, property_id, err)) { \ + return (RETURN_TYPE)(0); \ + } \ + return getProperty(*interop_val, property_id, err); \ + } +__OMP_GET_INTEROP_TY(intptr_t, int) +__OMP_GET_INTEROP_TY(void *, ptr) +__OMP_GET_INTEROP_TY(const char *, str) +#undef __OMP_GET_INTEROP_TY + +#define __OMP_GET_INTEROP_TY3(RETURN_TYPE, SUFFIX) \ + RETURN_TYPE omp_get_interop_##SUFFIX(const omp_interop_t interop, \ + omp_interop_property_t property_id) { \ + int err; \ + omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ + if (!getPropertyCheck(&interop_val, property_id, &err)) { \ + return (RETURN_TYPE)(0); \ + } \ + return nullptr; \ + return getProperty(*interop_val, property_id, &err); \ + } +__OMP_GET_INTEROP_TY3(const char *, name) +__OMP_GET_INTEROP_TY3(const char *, type_desc) +__OMP_GET_INTEROP_TY3(const char *, rc_desc) +#undef __OMP_GET_INTEROP_TY3 + +typedef int64_t kmp_int64; + +#ifdef __cplusplus +extern "C" { +#endif +void __tgt_interop_init(ident_t *loc_ref, kmp_int32 gtid, + omp_interop_val_t *&interop_ptr, + kmp_interop_type_t interop_type, kmp_int32 device_id, + kmp_int64 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 have_nowait) { + kmp_int32 ndeps_noalias = 0; + kmp_depend_info_t *noalias_dep_list = NULL; + assert(interop_type != kmp_interop_type_unknown && + "Cannot initialize with unknown interop_type!"); + if (device_id == -1) { + device_id = omp_get_default_device(); + } + + if (interop_type == kmp_interop_type_tasksync) { + __kmpc_omp_wait_deps(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, + noalias_dep_list); + } + + interop_ptr = new omp_interop_val_t(device_id, interop_type); + if (!device_is_ready(device_id)) { + interop_ptr->err_str = "Device not ready!"; + return; + } + + DeviceTy &Device = PM->Devices[device_id]; + if (!Device.RTL || !Device.RTL->init_device_info || + Device.RTL->init_device_info(device_id, &(interop_ptr)->device_info, + &(interop_ptr)->err_str)) { + delete interop_ptr; + interop_ptr = omp_interop_none; + } + if (interop_type == kmp_interop_type_tasksync) { + if (!Device.RTL || !Device.RTL->init_async_info || + Device.RTL->init_async_info(device_id, &(interop_ptr)->async_info)) { + delete interop_ptr; + interop_ptr = omp_interop_none; + } + } + // TODO Flush the queue associated with the interop through the plugin + // TODO Signal out dependences +} + +void __tgt_interop_use(ident_t *loc_ref, kmp_int32 gtid, + omp_interop_val_t *&interop_ptr, kmp_int32 device_id, + kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 have_nowait) { + kmp_int32 ndeps_noalias = 0; + kmp_depend_info_t *noalias_dep_list = NULL; + assert(interop_ptr && "Cannot use nullptr!"); + omp_interop_val_t *interop_val = interop_ptr; + if (device_id == -1) { + device_id = omp_get_default_device(); + } + assert(interop_val != omp_interop_none && + "Cannot use uninitialized interop_ptr!"); + assert((device_id == -1 || interop_val->device_id == device_id) && + "Inconsistent device-id usage!"); + + if (!device_is_ready(device_id)) { + interop_ptr->err_str = "Device not ready!"; + return; + } + + if (interop_val->interop_type == kmp_interop_type_tasksync) { + __kmpc_omp_wait_deps(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, + noalias_dep_list); + } +} + +void __tgt_interop_destroy(ident_t *loc_ref, kmp_int32 gtid, + omp_interop_val_t *&interop_ptr, kmp_int32 device_id, + kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 have_nowait) { + kmp_int32 ndeps_noalias = 0; + kmp_depend_info_t *noalias_dep_list = NULL; + assert(interop_ptr && "Cannot use nullptr!"); + omp_interop_val_t *interop_val = interop_ptr; + if (device_id == -1) { + device_id = omp_get_default_device(); + } + + if (interop_val == omp_interop_none) + return; + + assert((device_id == -1 || interop_val->device_id == device_id) && + "Inconsistent device-id usage!"); + if (!device_is_ready(device_id)) { + interop_ptr->err_str = "Device not ready!"; + return; + } + + if (interop_val->interop_type == kmp_interop_type_tasksync) { + __kmpc_omp_wait_deps(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, + noalias_dep_list); + } + + delete interop_ptr; + interop_ptr = omp_interop_none; +} +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -89,11 +89,35 @@ #ifdef __cplusplus extern "C" { #endif + +/*! + * The ident structure that describes a source location. + * The struct is identical to the one in the kmp.h file. + * We maintain the same data structure for compatibility. + */ +typedef int kmp_int32; +typedef intptr_t kmp_intptr_t; + +// Compiler sends us this info: +typedef struct kmp_depend_info { + kmp_intptr_t base_addr; + size_t len; + struct { + bool in : 1; + bool out : 1; + bool mtx : 1; + } flags; +} kmp_depend_info_t; + // functions that extract info from libomp; keep in sync int omp_get_default_device(void) __attribute__((weak)); int32_t __kmpc_omp_taskwait(void *loc_ref, int32_t gtid) __attribute__((weak)); int32_t __kmpc_global_thread_num(void *) __attribute__((weak)); int __kmpc_get_target_offload(void) __attribute__((weak)); +void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) + __attribute__((weak)); #ifdef __cplusplus } #endif diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h --- a/openmp/libomptarget/src/rtl.h +++ b/openmp/libomptarget/src/rtl.h @@ -56,6 +56,10 @@ typedef int32_t (*register_lib_ty)(__tgt_bin_desc *); typedef int32_t(supports_empty_images_ty)(); typedef void(set_info_flag_ty)(uint32_t); + typedef int32_t(release_async_info_ty)(int32_t, __tgt_async_info *); + typedef int32_t(init_async_info_ty)(int32_t, __tgt_async_info **); + typedef int64_t(init_device_into_ty)(int64_t, __tgt_device_info *, + const char **); int32_t Idx = -1; // RTL index, index is the number of devices // of other RTLs that were registered before, @@ -93,6 +97,9 @@ register_lib_ty unregister_lib = nullptr; supports_empty_images_ty *supports_empty_images = nullptr; set_info_flag_ty *set_info_flag = nullptr; + init_async_info_ty *init_async_info = nullptr; + init_device_into_ty *init_device_info = nullptr; + release_async_info_ty *release_async_info = nullptr; // Are there images associated with this RTL. bool isUsed = false; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -177,6 +177,12 @@ dlsym(dynlib_handle, "__tgt_rtl_supports_empty_images"); *((void **)&R.set_info_flag) = dlsym(dynlib_handle, "__tgt_rtl_set_info_flag"); + *((void **)&R.release_async_info) = + dlsym(dynlib_handle, "__tgt_rtl_release_async_info"); + *((void **)&R.init_async_info) = + dlsym(dynlib_handle, "__tgt_rtl_init_async_info"); + *((void **)&R.init_device_info) = + dlsym(dynlib_handle, "__tgt_rtl_init_device_info"); } DP("RTLs loaded!\n"); diff --git a/openmp/libomptarget/test/offloading/interop.c b/openmp/libomptarget/test/offloading/interop.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/offloading/interop.c @@ -0,0 +1,48 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: nvptx64-nvidia-cuda + +#include +#include +#include +#include +#include + +typedef void *cudaStream_t; + +int main() { + + int device_id = omp_get_default_device(); + +#pragma omp parallel master + { + + double D0, D2; + omp_interop_t interop; + +#pragma omp interop init(targetsync : interop) device(device_id) nowait + assert(interop != NULL); + + int err; + for (int i = omp_ipr_first; i < 0; i++) { + const char *n = + omp_get_interop_name(interop, (omp_interop_property_t)(i)); + long int li = + omp_get_interop_int(interop, (omp_interop_property_t)(i), &err); + const void *p = + omp_get_interop_ptr(interop, (omp_interop_property_t)(i), &err); + const char *s = + omp_get_interop_str(interop, (omp_interop_property_t)(i), &err); + const char *n1 = + omp_get_interop_type_desc(interop, (omp_interop_property_t)(i)); + } +#pragma omp interop use(interop) depend(in : D0, D2) + + cudaStream_t stream = + (omp_get_interop_ptr(interop, omp_ipr_targetsync, NULL)); + assert(stream != NULL); + +#pragma omp interop destroy(interop) depend(in : D0, D2) device(device_id) + } + printf("PASS\n"); +} +// CHECK: PASS diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -548,6 +548,9 @@ omp_display_env 733 omp_calloc 776 omp_realloc 777 + omp_get_interop_int 2514 + omp_get_interop_ptr 2515 + omp_get_interop_str 2516 omp_null_allocator DATA omp_default_mem_alloc DATA diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -1446,6 +1446,120 @@ #endif } +/// TODO: Include the `omp.h` of the current build +/* OpenMP 5.1 interop */ +typedef intptr_t omp_intptr_t; + +/* 0..omp_get_num_interop_properties()-1 are reserved for implementation-defined + * properties */ +typedef enum omp_interop_property { + omp_ipr_fr_id = -1, + omp_ipr_fr_name = -2, + omp_ipr_vendor = -3, + omp_ipr_vendor_name = -4, + omp_ipr_device_num = -5, + omp_ipr_platform = -6, + omp_ipr_device = -7, + omp_ipr_device_context = -8, + omp_ipr_targetsync = -9, + omp_ipr_first = -9 +} omp_interop_property_t; + +#define omp_interop_none 0 + +typedef enum omp_interop_rc { + omp_irc_no_value = 1, + omp_irc_success = 0, + omp_irc_empty = -1, + omp_irc_out_of_range = -2, + omp_irc_type_int = -3, + omp_irc_type_ptr = -4, + omp_irc_type_str = -5, + omp_irc_other = -6 +} omp_interop_rc_t; + +typedef enum omp_interop_fr { + omp_ifr_cuda = 1, + omp_ifr_cuda_driver = 2, + omp_ifr_opencl = 3, + omp_ifr_sycl = 4, + omp_ifr_hip = 5, + omp_ifr_level_zero = 6, + omp_ifr_last = 7 +} omp_interop_fr_t; + +typedef void *omp_interop_t; + +// libomptarget, if loaded, provides this function +int FTN_STDCALL FTN_GET_NUM_INTEROP_PROPERTIES(const omp_interop_t interop) { +#if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) + return 0; +#else + int (*fptr)(const omp_interop_t); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_num_interop_properties"))) + return (*fptr)(interop); + return 0; +#endif // KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB) +} + +/// TODO Convert FTN_GET_INTEROP_XXX functions into a macro like interop.cpp +// libomptarget, if loaded, provides this function +intptr_t FTN_STDCALL FTN_GET_INTEROP_INT(const omp_interop_t interop, + omp_interop_property_t property_id, + int *err) { + intptr_t (*fptr)(const omp_interop_t, omp_interop_property_t, int *); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_int"))) + return (*fptr)(interop, property_id, err); + return 0; +} + +// libomptarget, if loaded, provides this function +void *FTN_STDCALL FTN_GET_INTEROP_PTR(const omp_interop_t interop, + omp_interop_property_t property_id, + int *err) { + void *(*fptr)(const omp_interop_t, omp_interop_property_t, int *); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_ptr"))) + return (*fptr)(interop, property_id, err); + return nullptr; +} + +// libomptarget, if loaded, provides this function +const char *FTN_STDCALL FTN_GET_INTEROP_STR(const omp_interop_t interop, + omp_interop_property_t property_id, + int *err) { + const char *(*fptr)(const omp_interop_t, omp_interop_property_t, int *); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_str"))) + return (*fptr)(interop, property_id, err); + return nullptr; +} + +// libomptarget, if loaded, provides this function +const char *FTN_STDCALL FTN_GET_INTEROP_NAME( + const omp_interop_t interop, omp_interop_property_t property_id) { + const char *(*fptr)(const omp_interop_t, omp_interop_property_t); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_name"))) + return (*fptr)(interop, property_id); + return nullptr; +} + +// libomptarget, if loaded, provides this function +const char *FTN_STDCALL FTN_GET_INTEROP_TYPE_DESC( + const omp_interop_t interop, omp_interop_property_t property_id) { + const char *(*fptr)(const omp_interop_t, omp_interop_property_t); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_type_desc"))) + return (*fptr)(interop, property_id); + return nullptr; +} + +// libomptarget, if loaded, provides this function +const char *FTN_STDCALL FTN_GET_INTEROP_RC_DESC( + const omp_interop_t interop, omp_interop_property_t property_id) { + const char *(*fptr)(const omp_interop_t, omp_interop_property_t); + if ((*(void **)(&fptr) = KMP_DLSYM_NEXT("omp_get_interop_rec_desc"))) + return (*fptr)(interop, property_id); + return nullptr; +} + // display environment variables when requested void FTN_STDCALL FTN_DISPLAY_ENV(int verbose) { #ifndef KMP_STUB diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h --- a/openmp/runtime/src/kmp_ftn_os.h +++ b/openmp/runtime/src/kmp_ftn_os.h @@ -140,6 +140,14 @@ #define FTN_SET_TEAMS_THREAD_LIMIT omp_set_teams_thread_limit #define FTN_GET_TEAMS_THREAD_LIMIT omp_get_teams_thread_limit +#define FTN_GET_NUM_INTEROP_PROPERTIES omp_get_num_interop_properties +#define FTN_GET_INTEROP_INT omp_get_interop_int +#define FTN_GET_INTEROP_PTR omp_get_interop_ptr +#define FTN_GET_INTEROP_STR omp_get_interop_str +#define FTN_GET_INTEROP_NAME omp_get_interop_name +#define FTN_GET_INTEROP_TYPE_DESC omp_get_interop_type_desc +#define FTN_GET_INTEROP_RC_DESC omp_get_interop_rc_desc + #endif /* KMP_FTN_PLAIN */ /* ------------------------------------------------------------------------ */