diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -530,6 +530,31 @@ /// Flag to indicate if we use events to ensure the atomicity of /// map clauses or not. Can be modified with an environment variable. const bool UseEventsForAtomicTransfers; + + // Work around for plugins that call dlopen on shared libraries that call + // tgt_register_lib during their initialisation. Stash the pointers in a + // vector until the plugins are all initialised and then register them. + bool maybeDelayRegisterLib(__tgt_bin_desc *Desc) { + if (!RTLsLoaded) { + // Only reachable from libomptarget constructor + DelayedBinDesc.push_back(Desc); + return true; + } else { + return false; + } + } + + void registerDelayedLibraries() { + // Only called by libomptarget constructor + RTLsLoaded = true; + for (auto *Desc : DelayedBinDesc) + __tgt_register_lib(Desc); + DelayedBinDesc.clear(); + } + +private: + bool RTLsLoaded = false; + llvm::SmallVector<__tgt_bin_desc *> DelayedBinDesc; }; extern PluginManager *PM; diff --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h --- a/openmp/libomptarget/include/rtl.h +++ b/openmp/libomptarget/include/rtl.h @@ -171,10 +171,8 @@ // Unregister a shared library from all RTLs. void unregisterLib(__tgt_bin_desc *Desc); - // Mutex-like object to guarantee thread-safety and unique initialization - // (i.e. the library attempts to load the RTLs (plugins) only once). - std::once_flag InitFlag; - void loadRTLs(); // not thread-safe + // not thread-safe, called from global constructor (i.e. once) + void loadRTLs(); private: static bool attemptLoadRTL(const std::string &RTLName, RTLInfoTy &RTL); diff --git a/openmp/libomptarget/plugins/remote/server/Server.cpp b/openmp/libomptarget/plugins/remote/server/Server.cpp --- a/openmp/libomptarget/plugins/remote/server/Server.cpp +++ b/openmp/libomptarget/plugins/remote/server/Server.cpp @@ -90,8 +90,6 @@ Status RemoteOffloadImpl::GetNumberOfDevices(ServerContext *Context, const Null *Null, I32 *NumberOfDevices) { - std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); - int32_t Devices = 0; PM->RTLsMtx.lock(); for (auto &RTL : PM->RTLs.AllRTLs) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -35,7 +35,9 @@ /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { TIMESCOPE(); - std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs); + if (PM->maybeDelayRegisterLib(Desc)) + return; + for (auto &RTL : PM->RTLs.AllRTLs) { if (RTL.register_lib) { if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) { diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -64,6 +64,9 @@ // TODO: add a configuration option for time granularity if (ProfileTraceFile) timeTraceProfilerInitialize(500 /* us */, "libomptarget"); + + PM->RTLs.loadRTLs(); + PM->registerDelayedLibraries(); } __attribute__((destructor(101))) void deinit() {