diff --git a/openmp/libomptarget/include/dlwrap.h b/openmp/libomptarget/include/dlwrap.h --- a/openmp/libomptarget/include/dlwrap.h +++ b/openmp/libomptarget/include/dlwrap.h @@ -71,14 +71,18 @@ // DLWRAP_INTERNAL is similar, except the function it expands to is: // static int dlwrap_foo(char x0, double x1) { ... } // so that the function pointer call can be wrapped in library-specific code +// +// DLWRAP_INITIALIZE() declares static functions: +#define DLWRAP_INITIALIZE() \ + namespace dlwrap { \ + static size_t size(); \ + static const char *symbol(size_t); /* get symbol name in [0, size()) */ \ + static void ** \ + pointer(size_t); /* get pointer to function pointer in [0, size()) */ \ + } -// DLWRAP_FINALIZE() expands to definitions of: +// DLWRAP_FINALIZE() implements the functions from DLWRAP_INITIALIZE #define DLWRAP_FINALIZE() DLWRAP_FINALIZE_IMPL() -namespace dlwrap { -static size_t size(); -static const char *symbol(size_t); // get symbol name in [0, size()) -static void **pointer(size_t); // get pointer to function pointer in [0, size()) -} // namespace dlwrap // Implementation details follow. diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -14,7 +14,7 @@ #ifndef _OMPTARGETPLUGIN_H_ #define _OMPTARGETPLUGIN_H_ -#include +#include "omptarget.h" #ifdef __cplusplus extern "C" { diff --git a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp --- a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp +++ b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp @@ -17,6 +17,8 @@ #include +DLWRAP_INITIALIZE(); + DLWRAP_INTERNAL(hsa_init, 0); DLWRAP(hsa_status_string, 2); diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp --- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp +++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp @@ -20,6 +20,8 @@ #include +DLWRAP_INITIALIZE(); + DLWRAP_INTERNAL(cuInit, 1); DLWRAP(cuCtxGetDevice, 1); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -497,12 +497,12 @@ int32_t DeviceTy::runRegion(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, AsyncInfoTy &AsyncInfo) { - if (!RTL->run_region || !RTL->synchronize) - return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, - TgtVarsSize); + if (!RTL->run_target_region || !RTL->synchronize) + return RTL->run_target_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, + TgtOffsets, TgtVarsSize); else - return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, - TgtOffsets, TgtVarsSize, AsyncInfo); + return RTL->run_target_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, + TgtOffsets, TgtVarsSize, AsyncInfo); } // Run region on device @@ -519,14 +519,14 @@ int32_t NumTeams, int32_t ThreadLimit, uint64_t LoopTripCount, AsyncInfoTy &AsyncInfo) { - if (!RTL->run_team_region_async || !RTL->synchronize) - return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, - TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit, - LoopTripCount); + if (!RTL->run_target_team_region_async || !RTL->synchronize) + return RTL->run_target_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, + TgtOffsets, TgtVarsSize, NumTeams, + ThreadLimit, LoopTripCount); else - return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, - TgtOffsets, TgtVarsSize, NumTeams, - ThreadLimit, LoopTripCount, AsyncInfo); + return RTL->run_target_team_region_async( + RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, TgtVarsSize, NumTeams, + ThreadLimit, LoopTripCount, AsyncInfo); } // Whether data can be copied to DstDevice directly diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h --- a/openmp/libomptarget/src/rtl.h +++ b/openmp/libomptarget/src/rtl.h @@ -13,7 +13,9 @@ #ifndef _OMPTARGET_RTL_H #define _OMPTARGET_RTL_H +#include "dlwrap.h" #include "omptarget.h" +#include "omptargetplugin.h" #include #include #include @@ -25,43 +27,41 @@ struct __tgt_bin_desc; struct RTLInfoTy { - typedef int32_t(is_valid_binary_ty)(void *); - typedef int32_t(is_data_exchangable_ty)(int32_t, int32_t); - typedef int32_t(number_of_devices_ty)(); - typedef int32_t(init_device_ty)(int32_t); + +#ifdef RTL_MAKE_TYPEDEF +#error "RTL_MAKE_TYPEDEF already defined" +#endif +#define RTL_MAKE_TYPEDEF(SYMBOL) \ + using SYMBOL##_ty = dlwrap::trait::FunctionType + RTL_MAKE_TYPEDEF(is_valid_binary); + RTL_MAKE_TYPEDEF(is_data_exchangable); + RTL_MAKE_TYPEDEF(number_of_devices); + RTL_MAKE_TYPEDEF(init_device); typedef __tgt_target_table *(load_binary_ty)(int32_t, void *); - typedef void *(data_alloc_ty)(int32_t, int64_t, void *, int32_t); - typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t); - typedef int32_t(data_submit_async_ty)(int32_t, void *, void *, int64_t, - __tgt_async_info *); - typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t); - typedef int32_t(data_retrieve_async_ty)(int32_t, void *, void *, int64_t, - __tgt_async_info *); - typedef int32_t(data_exchange_ty)(int32_t, void *, int32_t, void *, int64_t); - typedef int32_t(data_exchange_async_ty)(int32_t, void *, int32_t, void *, - int64_t, __tgt_async_info *); - typedef int32_t(data_delete_ty)(int32_t, void *); - typedef int32_t(run_region_ty)(int32_t, void *, void **, ptrdiff_t *, - int32_t); - typedef int32_t(run_region_async_ty)(int32_t, void *, void **, ptrdiff_t *, - int32_t, __tgt_async_info *); - typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *, - int32_t, int32_t, int32_t, uint64_t); - typedef int32_t(run_team_region_async_ty)(int32_t, void *, void **, - ptrdiff_t *, int32_t, int32_t, - int32_t, uint64_t, - __tgt_async_info *); - typedef int64_t(init_requires_ty)(int64_t); - typedef int32_t(synchronize_ty)(int32_t, __tgt_async_info *); + RTL_MAKE_TYPEDEF(data_alloc); + RTL_MAKE_TYPEDEF(data_submit); + RTL_MAKE_TYPEDEF(data_submit_async); + RTL_MAKE_TYPEDEF(data_retrieve); + RTL_MAKE_TYPEDEF(data_retrieve_async); + RTL_MAKE_TYPEDEF(data_exchange); + RTL_MAKE_TYPEDEF(data_exchange_async); + RTL_MAKE_TYPEDEF(data_delete); + RTL_MAKE_TYPEDEF(run_target_region); + RTL_MAKE_TYPEDEF(run_target_region_async); + RTL_MAKE_TYPEDEF(run_target_team_region); + RTL_MAKE_TYPEDEF(run_target_team_region_async); + RTL_MAKE_TYPEDEF(init_requires); + RTL_MAKE_TYPEDEF(synchronize); typedef int32_t (*register_lib_ty)(__tgt_bin_desc *); - typedef int32_t(supports_empty_images_ty)(); - typedef void(print_device_info_ty)(int32_t); - typedef void(set_info_flag_ty)(uint32_t); - typedef int32_t(create_event_ty)(int32_t, void **); - typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *); - typedef int32_t(wait_event_ty)(int32_t, void *, __tgt_async_info *); - typedef int32_t(sync_event_ty)(int32_t, void *); - typedef int32_t(destroy_event_ty)(int32_t, void *); + RTL_MAKE_TYPEDEF(supports_empty_images); + RTL_MAKE_TYPEDEF(print_device_info); + RTL_MAKE_TYPEDEF(set_info_flag); + RTL_MAKE_TYPEDEF(create_event); + RTL_MAKE_TYPEDEF(record_event); + RTL_MAKE_TYPEDEF(wait_event); + RTL_MAKE_TYPEDEF(sync_event); + RTL_MAKE_TYPEDEF(destroy_event); +#undef RTL_MAKE_TYPEDEF int32_t Idx = -1; // RTL index, index is the number of devices // of other RTLs that were registered before, @@ -76,35 +76,35 @@ #endif // Functions implemented in the RTL. - is_valid_binary_ty *is_valid_binary = nullptr; - is_data_exchangable_ty *is_data_exchangable = nullptr; - number_of_devices_ty *number_of_devices = nullptr; - init_device_ty *init_device = nullptr; + is_valid_binary_ty is_valid_binary = nullptr; + is_data_exchangable_ty is_data_exchangable = nullptr; + number_of_devices_ty number_of_devices = nullptr; + init_device_ty init_device = nullptr; load_binary_ty *load_binary = nullptr; - data_alloc_ty *data_alloc = nullptr; - data_submit_ty *data_submit = nullptr; - data_submit_async_ty *data_submit_async = nullptr; - data_retrieve_ty *data_retrieve = nullptr; - data_retrieve_async_ty *data_retrieve_async = nullptr; - data_exchange_ty *data_exchange = nullptr; - data_exchange_async_ty *data_exchange_async = nullptr; - data_delete_ty *data_delete = nullptr; - run_region_ty *run_region = nullptr; - run_region_async_ty *run_region_async = nullptr; - run_team_region_ty *run_team_region = nullptr; - run_team_region_async_ty *run_team_region_async = nullptr; - init_requires_ty *init_requires = nullptr; - synchronize_ty *synchronize = nullptr; + data_alloc_ty data_alloc = nullptr; + data_submit_ty data_submit = nullptr; + data_submit_async_ty data_submit_async = nullptr; + data_retrieve_ty data_retrieve = nullptr; + data_retrieve_async_ty data_retrieve_async = nullptr; + data_exchange_ty data_exchange = nullptr; + data_exchange_async_ty data_exchange_async = nullptr; + data_delete_ty data_delete = nullptr; + run_target_region_ty run_target_region = nullptr; + run_target_region_async_ty run_target_region_async = nullptr; + run_target_team_region_ty run_target_team_region = nullptr; + run_target_team_region_async_ty run_target_team_region_async = nullptr; + init_requires_ty init_requires = nullptr; + synchronize_ty synchronize = nullptr; register_lib_ty register_lib = nullptr; register_lib_ty unregister_lib = nullptr; - supports_empty_images_ty *supports_empty_images = nullptr; - set_info_flag_ty *set_info_flag = nullptr; - print_device_info_ty *print_device_info = nullptr; - create_event_ty *create_event = nullptr; - record_event_ty *record_event = nullptr; - wait_event_ty *wait_event = nullptr; - sync_event_ty *sync_event = nullptr; - destroy_event_ty *destroy_event = nullptr; + supports_empty_images_ty supports_empty_images = nullptr; + set_info_flag_ty set_info_flag = nullptr; + print_device_info_ty print_device_info = nullptr; + create_event_ty create_event = nullptr; + record_event_ty record_event = nullptr; + wait_event_ty wait_event = nullptr; + sync_event_ty sync_event = nullptr; + destroy_event_ty destroy_event = nullptr; // Are there images associated with this RTL. bool isUsed = false; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -65,6 +65,15 @@ #endif } +template static T CastDlsym(void *Handle, const char *Symbol) { + // dlsym returns void*, which can be cast to a function pointer in posix + // but not in c++, thus static_cast is an error and we need reinterpret + // or (c-style) + return reinterpret_cast(dlsym(Handle, Symbol)); +} +#define TypedDlsym(HANDLE, SYMBOL) \ + CastDlsym::FunctionType>(HANDLE, #SYMBOL) + void RTLsTy::LoadRTLs() { // Parse environment variable OMP_TARGET_OFFLOAD (if set) PM->TargetOffloadPolicy = @@ -96,35 +105,30 @@ bool ValidPlugin = true; - if (!(*((void **)&R.is_valid_binary) = - dlsym(dynlib_handle, "__tgt_rtl_is_valid_binary"))) + if (!(R.is_valid_binary = + TypedDlsym(dynlib_handle, __tgt_rtl_is_valid_binary))) ValidPlugin = false; - if (!(*((void **)&R.number_of_devices) = - dlsym(dynlib_handle, "__tgt_rtl_number_of_devices"))) + if (!(R.number_of_devices = + TypedDlsym(dynlib_handle, __tgt_rtl_number_of_devices))) ValidPlugin = false; - if (!(*((void **)&R.init_device) = - dlsym(dynlib_handle, "__tgt_rtl_init_device"))) + if (!(R.init_device = TypedDlsym(dynlib_handle, __tgt_rtl_init_device))) ValidPlugin = false; if (!(*((void **)&R.load_binary) = dlsym(dynlib_handle, "__tgt_rtl_load_binary"))) ValidPlugin = false; - if (!(*((void **)&R.data_alloc) = - dlsym(dynlib_handle, "__tgt_rtl_data_alloc"))) + if (!(R.data_alloc = TypedDlsym(dynlib_handle, __tgt_rtl_data_alloc))) ValidPlugin = false; - if (!(*((void **)&R.data_submit) = - dlsym(dynlib_handle, "__tgt_rtl_data_submit"))) + if (!(R.data_submit = TypedDlsym(dynlib_handle, __tgt_rtl_data_submit))) ValidPlugin = false; - if (!(*((void **)&R.data_retrieve) = - dlsym(dynlib_handle, "__tgt_rtl_data_retrieve"))) + if (!(R.data_retrieve = TypedDlsym(dynlib_handle, __tgt_rtl_data_retrieve))) ValidPlugin = false; - if (!(*((void **)&R.data_delete) = - dlsym(dynlib_handle, "__tgt_rtl_data_delete"))) + if (!(R.data_delete = TypedDlsym(dynlib_handle, __tgt_rtl_data_delete))) ValidPlugin = false; - if (!(*((void **)&R.run_region) = - dlsym(dynlib_handle, "__tgt_rtl_run_target_region"))) + if (!(R.run_target_region = + TypedDlsym(dynlib_handle, __tgt_rtl_run_target_region))) ValidPlugin = false; - if (!(*((void **)&R.run_team_region) = - dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region"))) + if (!(R.run_target_team_region = + TypedDlsym(dynlib_handle, __tgt_rtl_run_target_team_region))) ValidPlugin = false; // Invalid plugin @@ -152,41 +156,36 @@ R.NumberOfDevices); // Optional functions - *((void **)&R.init_requires) = - dlsym(dynlib_handle, "__tgt_rtl_init_requires"); - *((void **)&R.data_submit_async) = - dlsym(dynlib_handle, "__tgt_rtl_data_submit_async"); - *((void **)&R.data_retrieve_async) = - dlsym(dynlib_handle, "__tgt_rtl_data_retrieve_async"); - *((void **)&R.run_region_async) = - dlsym(dynlib_handle, "__tgt_rtl_run_target_region_async"); - *((void **)&R.run_team_region_async) = - dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region_async"); - *((void **)&R.synchronize) = dlsym(dynlib_handle, "__tgt_rtl_synchronize"); - *((void **)&R.data_exchange) = - dlsym(dynlib_handle, "__tgt_rtl_data_exchange"); - *((void **)&R.data_exchange_async) = - dlsym(dynlib_handle, "__tgt_rtl_data_exchange_async"); - *((void **)&R.is_data_exchangable) = - dlsym(dynlib_handle, "__tgt_rtl_is_data_exchangable"); + R.init_requires = TypedDlsym(dynlib_handle, __tgt_rtl_init_requires); + R.data_submit_async = + TypedDlsym(dynlib_handle, __tgt_rtl_data_submit_async); + R.data_retrieve_async = + TypedDlsym(dynlib_handle, __tgt_rtl_data_retrieve_async); + R.run_target_region_async = + TypedDlsym(dynlib_handle, __tgt_rtl_run_target_region_async); + R.run_target_team_region_async = + TypedDlsym(dynlib_handle, __tgt_rtl_run_target_team_region_async); + R.synchronize = TypedDlsym(dynlib_handle, __tgt_rtl_synchronize); + R.data_exchange = TypedDlsym(dynlib_handle, __tgt_rtl_data_exchange); + R.data_exchange_async = + TypedDlsym(dynlib_handle, __tgt_rtl_data_exchange_async); + R.is_data_exchangable = + TypedDlsym(dynlib_handle, __tgt_rtl_is_data_exchangable); *((void **)&R.register_lib) = dlsym(dynlib_handle, "__tgt_rtl_register_lib"); *((void **)&R.unregister_lib) = dlsym(dynlib_handle, "__tgt_rtl_unregister_lib"); - *((void **)&R.supports_empty_images) = - dlsym(dynlib_handle, "__tgt_rtl_supports_empty_images"); - *((void **)&R.set_info_flag) = - dlsym(dynlib_handle, "__tgt_rtl_set_info_flag"); - *((void **)&R.print_device_info) = - dlsym(dynlib_handle, "__tgt_rtl_print_device_info"); - *((void **)&R.create_event) = - dlsym(dynlib_handle, "__tgt_rtl_create_event"); - *((void **)&R.record_event) = - dlsym(dynlib_handle, "__tgt_rtl_record_event"); - *((void **)&R.wait_event) = dlsym(dynlib_handle, "__tgt_rtl_wait_event"); - *((void **)&R.sync_event) = dlsym(dynlib_handle, "__tgt_rtl_sync_event"); - *((void **)&R.destroy_event) = - dlsym(dynlib_handle, "__tgt_rtl_destroy_event"); + + R.supports_empty_images = + TypedDlsym(dynlib_handle, __tgt_rtl_supports_empty_images); + R.set_info_flag = TypedDlsym(dynlib_handle, __tgt_rtl_set_info_flag); + R.print_device_info = + TypedDlsym(dynlib_handle, __tgt_rtl_print_device_info); + R.create_event = TypedDlsym(dynlib_handle, __tgt_rtl_create_event); + R.record_event = TypedDlsym(dynlib_handle, __tgt_rtl_record_event); + R.wait_event = TypedDlsym(dynlib_handle, __tgt_rtl_wait_event); + R.sync_event = TypedDlsym(dynlib_handle, __tgt_rtl_sync_event); + R.destroy_event = TypedDlsym(dynlib_handle, __tgt_rtl_destroy_event); } DP("RTLs loaded!\n");