diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -214,6 +214,9 @@ }; pthread_mutex_t KernelArgPool::Mutex = PTHREAD_MUTEX_INITIALIZER; +std::unordered_map> + KernelArgPoolMap; + /// Use a single entity to encode a kernel and a set of flags struct KernelTy { llvm::omp::OMPTgtExecModeFlags ExecutionMode; @@ -225,9 +228,7 @@ KernelTy(llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize, int32_t DeviceId, void *CallStackAddr, const char *Name, uint32_t KernargSegmentSize, - hsa_amd_memory_pool_t &KernArgMemoryPool, - std::unordered_map> - &KernelArgPoolMap) + hsa_amd_memory_pool_t &KernArgMemoryPool) : ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize), DeviceId(DeviceId), CallStackAddr(CallStackAddr), Name(Name) { DP("Construct kernelinfo: ExecMode %d\n", ExecutionMode); @@ -241,6 +242,10 @@ } }; +/// List that contains all the kernels. +/// FIXME: we may need this to be per device and per library. +std::list KernelsList; + template static hsa_status_t findAgents(Callback CB) { hsa_status_t Err = @@ -455,12 +460,6 @@ int NumberOfDevices = 0; - /// List that contains all the kernels. - /// FIXME: we may need this to be per device and per library. - std::list KernelsList; - std::unordered_map> - KernelArgPoolMap; - // GPU devices std::vector HSAAgents; std::vector HSAQueueSchedulers; // one per gpu @@ -862,6 +861,7 @@ "Unexpected device id!"); FuncGblEntries[DeviceId].emplace_back(); FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); + // KernelArgPoolMap.clear(); E.Entries.clear(); E.Table.EntriesBegin = E.Table.EntriesEnd = 0; } @@ -1117,8 +1117,10 @@ pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER; -static RTLDeviceInfoTy *DeviceInfoState = nullptr; -static RTLDeviceInfoTy &DeviceInfo() { return *DeviceInfoState; } +// Putting accesses to DeviceInfo global behind a function call prior +// to changing to use init_plugin/deinit_plugin calls +static RTLDeviceInfoTy DeviceInfoState; +static RTLDeviceInfoTy &DeviceInfo() { return DeviceInfoState; } namespace { @@ -1459,9 +1461,8 @@ KernelArgPool *ArgPool = nullptr; void *KernArg = nullptr; { - auto It = - DeviceInfo().KernelArgPoolMap.find(std::string(KernelInfo->Name)); - if (It != DeviceInfo().KernelArgPoolMap.end()) { + auto It = KernelArgPoolMap.find(std::string(KernelInfo->Name)); + if (It != KernelArgPoolMap.end()) { ArgPool = (It->second).get(); } } @@ -1940,20 +1941,6 @@ } extern "C" { - -int32_t __tgt_rtl_init_plugin() { - DeviceInfoState = new RTLDeviceInfoTy; - return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded) - ? OFFLOAD_SUCCESS - : OFFLOAD_FAIL; -} - -int32_t __tgt_rtl_deinit_plugin() { - if (DeviceInfoState) - delete DeviceInfoState; - return OFFLOAD_SUCCESS; -} - int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { return elfMachineIdIsAmdgcn(Image); } @@ -1985,6 +1972,9 @@ return true; } +int32_t __tgt_rtl_init_plugin() { return OFFLOAD_SUCCESS; } +int32_t __tgt_rtl_deinit_plugin() { return OFFLOAD_SUCCESS; } + int __tgt_rtl_number_of_devices() { // If the construction failed, no methods are safe to call if (DeviceInfo().ConstructionSucceeded) { @@ -2524,12 +2514,11 @@ } check("Loading computation property", Err); - DeviceInfo().KernelsList.push_back( - KernelTy(ExecModeVal, WGSizeVal, DeviceId, CallStackAddr, E->name, - KernargSegmentSize, DeviceInfo().KernArgPool, - DeviceInfo().KernelArgPoolMap)); + KernelsList.push_back(KernelTy(ExecModeVal, WGSizeVal, DeviceId, + CallStackAddr, E->name, KernargSegmentSize, + DeviceInfo().KernArgPool)); __tgt_offload_entry Entry = *E; - Entry.addr = (void *)&DeviceInfo().KernelsList.back(); + Entry.addr = (void *)&KernelsList.back(); DeviceInfo().addOffloadEntry(DeviceId, Entry); DP("Entry point %ld maps to %s\n", E - HostBegin, E->name); } diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -65,23 +65,6 @@ __attribute__((destructor(101))) void deinit() { DP("Deinit target library!\n"); - - for (auto *R : PM->RTLs.UsedRTLs) { - // Plugins can either destroy their local state using global variables - // or attribute(destructor) functions or by implementing deinit_plugin - // The hazard with plugin local destructors is they may be called before - // or after this destructor. If the plugin is destroyed using global - // state before this library finishes calling into it the plugin is - // likely to crash. If good fortune means the plugin outlives this - // library then there may be no crash. - // Using deinit_plugin and no global destructors from the plugin works. - if (R->deinit_plugin) { - if (R->deinit_plugin() != OFFLOAD_SUCCESS) { - DP("Failure deinitializing RTL %s!\n", R->RTLName.c_str()); - } - } - } - delete PM; if (ProfileTraceFile) { @@ -579,6 +562,13 @@ PM->TblMapMtx.unlock(); // TODO: Write some RTL->unload_image(...) function? + for (auto *R : UsedRTLs) { + if (R->deinit_plugin) { + if (R->deinit_plugin() != OFFLOAD_SUCCESS) { + DP("Failure deinitializing RTL %s!\n", R->RTLName.c_str()); + } + } + } DP("Done unregistering library!\n"); }