diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -13,6 +13,7 @@ #ifndef _OMPTARGET_DEVICE_H #define _OMPTARGET_DEVICE_H +#include #include #include #include @@ -499,8 +500,13 @@ /// Struct for the data required to handle plugins struct PluginManager { - PluginManager(bool UseEventsForAtomicTransfers) - : UseEventsForAtomicTransfers(UseEventsForAtomicTransfers) {} + PluginManager() : RefCount(0) {} + + /// Initialize the resources associated with this object. + void Load(); + + /// De-initialize the resources associated with this object. + void Unload(); /// RTLs identified on the host RTLsTy RTLs; @@ -529,9 +535,21 @@ /// Flag to indicate if we use events to ensure the atomicity of /// map clauses or not. Can be modified with an environment variable. - const bool UseEventsForAtomicTransfers; + bool UseEventsForAtomicTransfers; + + /// Filename to write time tracing results to. + char *ProfileTraceFile; + + /// Reference count for the libomptarget plugin. + std::atomic RefCount; }; extern PluginManager *PM; +/// Initialize the plugin manager. +void initLibomptarget(); + +/// Deinitialize the plugin manager. +void deinitLibomptarget(); + #endif diff --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h --- a/openmp/libomptarget/include/rtl.h +++ b/openmp/libomptarget/include/rtl.h @@ -171,9 +171,6 @@ // Unregister a shared library from all RTLs. void unregisterLib(__tgt_bin_desc *Desc); - // Mutex-like object to guarantee thread-safety and unique initialization - // (i.e. the library attempts to load the RTLs (plugins) only once). - std::once_flag InitFlag; void loadRTLs(); // not thread-safe private: diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -28,14 +28,18 @@ /// adds requires flags EXTERN void __tgt_register_requires(int64_t Flags) { TIMESCOPE(); - PM->RTLs.registerRequires(Flags); + // FIXME: This breaks unified_shared_memory because there is no guaruntee that + // it will be registered before the PM is initialized. + if (PM) + PM->RTLs.registerRequires(Flags); } //////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { + initLibomptarget(); + TIMESCOPE(); - std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs); for (auto &RTL : PM->RTLs.AllRTLs) { if (RTL.register_lib) { if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) { @@ -48,7 +52,10 @@ //////////////////////////////////////////////////////////////////////////////// /// Initialize all available devices without registering any image -EXTERN void __tgt_init_all_rtls() { PM->RTLs.initAllRTLs(); } +EXTERN void __tgt_init_all_rtls() { + if (PM) + PM->RTLs.initAllRTLs(); +} //////////////////////////////////////////////////////////////////////////////// /// unloads a target shared library @@ -62,6 +69,8 @@ } } } + + deinitLibomptarget(); } template diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -39,14 +39,14 @@ /* Remote target */ "libomptarget.rtl.rpc", }; -PluginManager *PM; +PluginManager *PM = nullptr; -static char *ProfileTraceFile = nullptr; +static std::mutex PluginManagerMutex; -__attribute__((constructor(101))) void init() { +void PluginManager::Load() { DP("Init target library!\n"); - bool UseEventsForAtomicTransfers = true; + UseEventsForAtomicTransfers = true; if (const char *ForceAtomicMap = getenv("LIBOMPTARGET_MAP_FORCE_ATOMIC")) { std::string ForceAtomicMapStr(ForceAtomicMap); if (ForceAtomicMapStr == "false" || ForceAtomicMapStr == "FALSE") @@ -58,18 +58,17 @@ ForceAtomicMap); } - PM = new PluginManager(UseEventsForAtomicTransfers); - - ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); // TODO: add a configuration option for time granularity + ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); if (ProfileTraceFile) timeTraceProfilerInitialize(500 /* us */, "libomptarget"); + + // Dynamically load all the supported runtime plugins. + RTLs.loadRTLs(); } -__attribute__((destructor(101))) void deinit() { +void PluginManager::Unload() { DP("Deinit target library!\n"); - delete PM; - if (ProfileTraceFile) { // TODO: add env var for file output if (auto E = timeTraceProfilerWrite(ProfileTraceFile, "-")) @@ -79,6 +78,30 @@ } } +void initLibomptarget() { + std::scoped_lock PluginLock(PluginManagerMutex); + + if (PM == nullptr) + PM = new PluginManager(); + + PM->RefCount++; + + if (PM->RefCount == 1) + PM->Load(); +} + +void deinitLibomptarget() { + std::scoped_lock PluginLock(PluginManagerMutex); + + if (PM->RefCount == 1) + PM->Unload(); + + PM->RefCount--; + + if (PM->RefCount == 0) + delete PM; +} + void RTLsTy::loadRTLs() { // Parse environment variable OMP_TARGET_OFFLOAD (if set) PM->TargetOffloadPolicy = diff --git a/openmp/libomptarget/test/api/omp_parallel_initialization.c b/openmp/libomptarget/test/api/omp_parallel_initialization.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/api/omp_parallel_initialization.c @@ -0,0 +1,50 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +#include +#include +#include + +typedef struct { + void *addr; + char *name; + int64_t size; + int32_t flags; + int32_t reserved; +} __tgt_offload_entry; + +typedef struct { + void *ImageStart; + void *ImageEnd; + __tgt_offload_entry *EntriesBegin; + __tgt_offload_entry *EntriesEnd; +} __tgt_device_image; + +typedef struct { + int32_t NumDeviceImages; + __tgt_device_image *DeviceImages; + __tgt_offload_entry *HostEntriesBegin; + __tgt_offload_entry *HostEntriesEnd; +} __tgt_bin_desc; + +extern void __tgt_register_lib(__tgt_bin_desc *); +extern void __tgt_unregister_lib(__tgt_bin_desc *); + +void *foo; + +int main() { + void *Img = (void *)""; + __tgt_offload_entry Entries = {foo, "foo", 0, 0, 0}; + __tgt_device_image Image = {Img, Img, &Entries, &Entries + 1}; + __tgt_bin_desc BinDesc = {1, &Image, &Entries, &Entries + 1}; + + const uint32_t NumThreads = 8; + +#pragma omp parallel num_threads(NumThreads) + __tgt_register_lib(&BinDesc); + +#pragma omp parallel num_threads(NumThreads) + __tgt_unregister_lib(&BinDesc); + + // CHECK: PASS + printf("PASS\n"); +}