Index: openmp/libomptarget/include/rtl.h =================================================================== --- openmp/libomptarget/include/rtl.h +++ openmp/libomptarget/include/rtl.h @@ -15,7 +15,7 @@ #include "omptarget.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/DynamicLibrary.h" #include "omptarget.h" @@ -24,6 +24,8 @@ #include #include #include +#include +#include // Forward declarations. struct DeviceTy; @@ -176,7 +178,27 @@ std::once_flag InitFlag; void loadRTLs(); // not thread-safe + void registerToAllRTLs(__tgt_bin_desc* Desc); + + // `loadRTLs` `dlopen`s the runtime plugins, which might load other libraries finally + // trying to load another library recursively. + // When this happens delay the loading of the libraries until the outermost call finishes, + // to ensure that the plugins are initialized before any library is loaded. + // returns true if the registration was delayed and the caller should skip registration + bool delayRegistration(__tgt_bin_desc* DelayedDesc); + + // Libraries might also be unregistered during `loadRTLs`, + // when this happens just cancel their delayed registration. + // returns true if the registration was delayed and the caller should skip de-registration + bool cancelDelayedRegistration(__tgt_bin_desc* DelayedDesc); + private: + // The thread that is currently loading the RTLs + std::atomic LoadingThread; + llvm::SmallPtrSet<__tgt_bin_desc*, 1> DelayedDescs; + + void runDelayedRegistrations(); + static bool attemptLoadRTL(const std::string &RTLName, RTLInfoTy &RTL); }; Index: openmp/libomptarget/src/interface.cpp =================================================================== --- openmp/libomptarget/src/interface.cpp +++ openmp/libomptarget/src/interface.cpp @@ -35,14 +35,12 @@ /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { TIMESCOPE(); + + if(PM->RTLs.delayRegistration(Desc)) + return; + std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs); - for (auto &RTL : PM->RTLs.AllRTLs) { - if (RTL.register_lib) { - if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) { - DP("Could not register library with %s", RTL.RTLName.c_str()); - } - } - } + PM->RTLs.registerToAllRTLs(Desc); PM->RTLs.registerLib(Desc); } @@ -54,6 +52,10 @@ /// unloads a target shared library EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { TIMESCOPE(); + + if(PM->RTLs.cancelDelayedRegistration(Desc)) + return; + PM->RTLs.unregisterLib(Desc); for (auto &RTL : PM->RTLs.UsedRTLs) { if (RTL->unregister_lib) { Index: openmp/libomptarget/src/rtl.cpp =================================================================== --- openmp/libomptarget/src/rtl.cpp +++ openmp/libomptarget/src/rtl.cpp @@ -12,17 +12,21 @@ #include "llvm/Object/OffloadBinary.h" +#include "llvm/ADT/ScopeExit.h" + #include "device.h" #include "private.h" #include "rtl.h" #include "Utilities.h" +#include #include #include #include #include #include +#include using namespace llvm; using namespace llvm::sys; @@ -80,6 +84,12 @@ } void RTLsTy::loadRTLs() { + LoadingThread.store(std::this_thread::get_id()); + const auto ResetOnExit = llvm::make_scope_exit([this] { + LoadingThread.store(std::thread::id{}); + runDelayedRegistrations(); + }); + // Parse environment variable OMP_TARGET_OFFLOAD (if set) PM->TargetOffloadPolicy = (kmp_target_offload_kind_t)__kmpc_get_target_offload(); @@ -113,6 +123,41 @@ DP("RTLs loaded!\n"); } +void RTLsTy::registerToAllRTLs(__tgt_bin_desc* Desc) { + for (auto &RTL : PM->RTLs.AllRTLs) { + if (RTL.register_lib) { + if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) { + DP("Could not register library with %s", RTL.RTLName.c_str()); + } + } + } +} + +bool RTLsTy::delayRegistration(__tgt_bin_desc* DelayedDesc) { + if(LoadingThread.load() != std::this_thread::get_id()) + return false; + + DelayedDescs.insert(DelayedDesc); + return true; +} + +bool RTLsTy::cancelDelayedRegistration(__tgt_bin_desc* DelayedDesc) { + if(LoadingThread.load() != std::this_thread::get_id()) + return false; + + DelayedDescs.erase(DelayedDesc); + return true; +} + +void RTLsTy::runDelayedRegistrations() { + for (auto* Desc: DelayedDescs) { + registerToAllRTLs(Desc); + registerLib(Desc); + } + + DelayedDescs.clear(); +} + bool RTLsTy::attemptLoadRTL(const std::string &RTLName, RTLInfoTy &RTL) { const char *Name = RTLName.c_str(); Index: openmp/libomptarget/test/offloading/dynamic_module_multiple.c =================================================================== --- /dev/null +++ openmp/libomptarget/test/offloading/dynamic_module_multiple.c @@ -0,0 +1,21 @@ +// RUN: %libomptarget-compile-generic -DFOO -fPIC -shared -o %t-foo.so && \ +// RUN: %libomptarget-compile-generic -DBAR -fPIC -shared -o %t-bar.so && \ +// RUN: %libomptarget-compile-generic %t-foo.so %t-bar.so && \ +// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic + +#if defined(FOO) +void foo() {} +#elif defined(BAR) +void bar() {} +#else + +#include +int main() { +#pragma omp target + ; + // CHECK: DONE. + printf("%s\n", "DONE."); + return 0; +} + +#endif