diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -82,6 +82,9 @@ /// adds requires flags EXTERN void __tgt_register_requires(int64_t flags) { TIMESCOPE(); + for (auto &RTL : PM->RTLs.UsedRTLs) + if (RTL->register_requires) + RTL->register_requires(flags); PM->RTLs.RegisterRequires(flags); } @@ -89,6 +92,10 @@ /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { TIMESCOPE(); + std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, PM->RTLs); + for (auto &RTL : PM->RTLs.AllRTLs) + if (RTL.register_lib) + RTL.register_lib(desc); PM->RTLs.RegisterLib(desc); } @@ -97,6 +104,9 @@ EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { TIMESCOPE(); PM->RTLs.UnregisterLib(desc); + for (auto &RTL : PM->RTLs.UsedRTLs) + if (RTL->unregister_lib) + RTL->unregister_lib(desc); } /// creates host-to-target data mapping, stores it in the diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h --- a/openmp/libomptarget/src/rtl.h +++ b/openmp/libomptarget/src/rtl.h @@ -53,6 +53,8 @@ __tgt_async_info *); typedef int64_t(init_requires_ty)(int64_t); typedef int64_t(synchronize_ty)(int32_t, __tgt_async_info *); + typedef void(register_lib_ty)(__tgt_bin_desc *); + typedef void(register_requires_ty)(int64_t); int32_t Idx = -1; // RTL index, index is the number of devices // of other RTLs that were registered before, @@ -86,6 +88,9 @@ run_team_region_async_ty *run_team_region_async = nullptr; init_requires_ty *init_requires = nullptr; synchronize_ty *synchronize = nullptr; + register_lib_ty *register_lib = nullptr; + register_lib_ty *unregister_lib = nullptr; + register_requires_ty *register_requires = nullptr; // Are there images associated with this RTL. bool isUsed = false; @@ -126,18 +131,14 @@ init_requires = r.init_requires; isUsed = r.isUsed; synchronize = r.synchronize; + register_lib = r.register_lib; + unregister_lib = r.unregister_lib; + register_requires = r.register_requires; } }; /// RTLs identified in the system. -class RTLsTy { -private: - // Mutex-like object to guarantee thread-safety and unique initialization - // (i.e. the library attempts to load the RTLs (plugins) only once). - std::once_flag initFlag; - void LoadRTLs(); // not thread-safe - -public: +struct RTLsTy { // List of the detected runtime libraries. std::list AllRTLs; @@ -157,8 +158,12 @@ // Unregister a shared library from all RTLs. void UnregisterLib(__tgt_bin_desc *desc); -}; + // Mutex-like object to guarantee thread-safety and unique initialization + // (i.e. the library attempts to load the RTLs (plugins) only once). + std::once_flag initFlag; + void LoadRTLs(); // not thread-safe +}; /// Map between the host entry begin and the translation table. Each /// registered library gets one TranslationTable. Use the map from diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -23,6 +23,7 @@ // List of all plugins that can support offloading. static const char *RTLNames[] = { + /* Remote target */ "libomptarget.rtl.rpc.so", /* PowerPC target */ "libomptarget.rtl.ppc64.so", /* x86_64 target */ "libomptarget.rtl.x86_64.so", /* CUDA target */ "libomptarget.rtl.cuda.so", @@ -148,6 +149,13 @@ *((void **)&R.is_data_exchangable) = dlsym(dynlib_handle, "__tgt_rtl_is_data_exchangable"); + *((void **)&R.register_lib) = + dlsym(dynlib_handle, "__tgt_rtl_register_lib"); + *((void **)&R.unregister_lib) = + dlsym(dynlib_handle, "__tgt_rtl_unregister_lib"); + *((void **)&R.register_requires) = + dlsym(dynlib_handle, "__tgt_rtl_register_requires"); + // No devices are supported by this RTL? if (!(R.NumberOfDevices = R.number_of_devices())) { DP("No devices supported in this RTL\n"); @@ -268,9 +276,6 @@ } void RTLsTy::RegisterLib(__tgt_bin_desc *desc) { - // Attempt to load all plugins available in the system. - std::call_once(initFlag, &RTLsTy::LoadRTLs, this); - PM->RTLsMtx.lock(); // Register the images with the RTLs that understand them, if any. for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {