diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -19,13 +19,13 @@ #include EXTERN int omp_get_num_devices(void) { - RTLsMtx->lock(); - size_t Devices_size = Devices.size(); - RTLsMtx->unlock(); + PM->RTLsMtx.lock(); + size_t DevicesSize = PM->Devices.size(); + PM->RTLsMtx.unlock(); - DP("Call to omp_get_num_devices returning %zd\n", Devices_size); + DP("Call to omp_get_num_devices returning %zd\n", DevicesSize); - return Devices_size; + return DevicesSize; } EXTERN int omp_get_initial_device(void) { @@ -56,7 +56,7 @@ return NULL; } - rc = Devices[device_num].allocData(size); + rc = PM->Devices[device_num].allocData(size); DP("omp_target_alloc returns device ptr " DPxMOD "\n", DPxPTR(rc)); return rc; } @@ -81,7 +81,7 @@ return; } - Devices[device_num].deleteData(device_ptr); + PM->Devices[device_num].deleteData(device_ptr); DP("omp_target_free deallocated device ptr\n"); } @@ -99,16 +99,16 @@ return true; } - RTLsMtx->lock(); - size_t Devices_size = Devices.size(); - RTLsMtx->unlock(); - if (Devices_size <= (size_t)device_num) { + PM->RTLsMtx.lock(); + size_t DevicesSize = PM->Devices.size(); + PM->RTLsMtx.unlock(); + if (DevicesSize <= (size_t)device_num) { DP("Call to omp_target_is_present with invalid device ID, returning " "false\n"); return false; } - DeviceTy& Device = Devices[device_num]; + DeviceTy &Device = PM->Devices[device_num]; bool IsLast; // not used bool IsHostPtr; void *TgtPtr = Device.getTgtPtrBegin(ptr, 0, IsLast, false, IsHostPtr); @@ -117,7 +117,7 @@ // getTgtPtrBegin() function which means that there is no device // corresponding point for ptr. This function should return false // in that situation. - if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) + if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) rc = !IsHostPtr; DP("Call to omp_target_is_present returns %d\n", rc); return rc; @@ -157,16 +157,16 @@ rc = OFFLOAD_FAIL; } else if (src_device == omp_get_initial_device()) { DP("copy from host to device\n"); - DeviceTy& DstDev = Devices[dst_device]; + DeviceTy &DstDev = PM->Devices[dst_device]; rc = DstDev.submitData(dstAddr, srcAddr, length, nullptr); } else if (dst_device == omp_get_initial_device()) { DP("copy from device to host\n"); - DeviceTy& SrcDev = Devices[src_device]; + DeviceTy &SrcDev = PM->Devices[src_device]; rc = SrcDev.retrieveData(dstAddr, srcAddr, length, nullptr); } else { DP("copy from device to device\n"); - DeviceTy &SrcDev = Devices[src_device]; - DeviceTy &DstDev = Devices[dst_device]; + DeviceTy &SrcDev = PM->Devices[src_device]; + DeviceTy &DstDev = PM->Devices[dst_device]; // First try to use D2D memcpy which is more efficient. If fails, fall back // to unefficient way. if (SrcDev.isDataExchangable(DstDev)) { @@ -263,7 +263,7 @@ return OFFLOAD_FAIL; } - DeviceTy& Device = Devices[device_num]; + DeviceTy &Device = PM->Devices[device_num]; void *device_addr = (void *)((uint64_t)device_ptr + (uint64_t)device_offset); int rc = Device.associatePtr(host_ptr, device_addr, size); DP("omp_target_associate_ptr returns %d\n", rc); @@ -290,7 +290,7 @@ return OFFLOAD_FAIL; } - DeviceTy& Device = Devices[device_num]; + DeviceTy &Device = PM->Devices[device_num]; int rc = Device.disassociatePtr(host_ptr); DP("omp_target_disassociate_ptr returns %d\n", rc); return rc; diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h --- a/openmp/libomptarget/src/device.h +++ b/openmp/libomptarget/src/device.h @@ -22,6 +22,8 @@ #include #include +#include "rtl.h" + // Forward declarations. struct RTLInfoTy; struct __tgt_bin_desc; @@ -29,6 +31,14 @@ struct __tgt_async_info; class MemoryManagerTy; +// enum for OMP_TARGET_OFFLOAD; keep in sync with kmp.h definition +enum kmp_target_offload_kind { + tgt_disabled = 0, + tgt_default = 1, + tgt_mandatory = 2 +}; +typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; + /// Map between host data and target data. struct HostDataToTargetTy { uintptr_t HstPtrBase; // host info. @@ -221,8 +231,31 @@ /// Map between Device ID (i.e. openmp device id) and its DeviceTy. typedef std::vector DevicesTy; -extern DevicesTy Devices; extern bool device_is_ready(int device_num); +/// Struct for the data required to handle plugins +struct PluginManager { + /// RTLs identified on the host + RTLsTy RTLs; + + /// Devices associated with RTLs + DevicesTy Devices; + std::mutex RTLsMtx; ///< For RTLs and Devices + + /// Translation table retreived from the binary + HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable; + std::mutex TrlTblMtx; ///< For Translation Table + + /// Map from ptrs on the host to an entry in the Translation Table + HostPtrToTableMapTy HostPtrToTableMap; + std::mutex TblMapMtx; ///< For HostPtrToTableMap + + // Store target policy (disabled, mandatory, default) + kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default; + std::mutex TargetOffloadMtx; ///< For TargetOffloadPolicy +}; + +extern PluginManager *PM; + #endif diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -20,9 +20,6 @@ #include #include -/// Map between Device ID (i.e. openmp device id) and its DeviceTy. -DevicesTy Devices; - DeviceTy::DeviceTy(const DeviceTy &D) : DeviceID(D.DeviceID), RTL(D.RTL), RTLDeviceID(D.RTLDeviceID), IsInit(D.IsInit), InitFlag(), HasPendingGlobals(D.HasPendingGlobals), @@ -239,7 +236,7 @@ MESSAGE("device mapping required by 'present' map type modifier does not " "exist for host address " DPxMOD " (%" PRId64 " bytes)", DPxPTR(HstPtrBegin), Size); - } else if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) { // If unified shared memory is active, implicitly mapped variables that are // not privatized use host address. Any explicitly mapped variables also use @@ -305,7 +302,7 @@ Size, (UpdateRefCount ? " updated" : ""), HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str()); rc = (void *)tp; - } else if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { + } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { // If the value isn't found in the mapping and unified shared memory // is on then it means we have stumbled upon a value which we need to // use directly from the host. @@ -335,7 +332,8 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete, bool HasCloseModifier) { - if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) + if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + !HasCloseModifier) return OFFLOAD_SUCCESS; // Check if the pointer is contained in any sub-nodes. int rc; @@ -370,7 +368,7 @@ void DeviceTy::init() { // Make call to init_requires if it exists for this plugin. if (RTL->init_requires) - RTL->init_requires(RTLs->RequiresFlags); + RTL->init_requires(PM->RTLs.RequiresFlags); int32_t Ret = RTL->init_device(RTLDeviceID); if (Ret != OFFLOAD_SUCCESS) return; @@ -512,16 +510,16 @@ DP("Checking whether device %d is ready.\n", device_num); // Devices.size() can only change while registering a new // library, so try to acquire the lock of RTLs' mutex. - RTLsMtx->lock(); - size_t Devices_size = Devices.size(); - RTLsMtx->unlock(); - if (Devices_size <= (size_t)device_num) { + PM->RTLsMtx.lock(); + size_t DevicesSize = PM->Devices.size(); + PM->RTLsMtx.unlock(); + if (DevicesSize <= (size_t)device_num) { DP("Device ID %d does not have a matching RTL\n", device_num); return false; } // Get device info - DeviceTy &Device = Devices[device_num]; + DeviceTy &Device = PM->Devices[device_num]; DP("Is the device %d (local ID %d) initialized? %d\n", device_num, Device.RTLDeviceID, Device.IsInit); diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -20,75 +20,73 @@ #include #include -// Store target policy (disabled, mandatory, default) -kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default; -std::mutex TargetOffloadMtx; - //////////////////////////////////////////////////////////////////////////////// /// manage the success or failure of a target construct static void HandleDefaultTargetOffload() { - TargetOffloadMtx.lock(); - if (TargetOffloadPolicy == tgt_default) { + PM->TargetOffloadMtx.lock(); + if (PM->TargetOffloadPolicy == tgt_default) { if (omp_get_num_devices() > 0) { DP("Default TARGET OFFLOAD policy is now mandatory " "(devices were found)\n"); - TargetOffloadPolicy = tgt_mandatory; + PM->TargetOffloadPolicy = tgt_mandatory; } else { DP("Default TARGET OFFLOAD policy is now disabled " "(no devices were found)\n"); - TargetOffloadPolicy = tgt_disabled; + PM->TargetOffloadPolicy = tgt_disabled; } } - TargetOffloadMtx.unlock(); + PM->TargetOffloadMtx.unlock(); } static int IsOffloadDisabled() { - if (TargetOffloadPolicy == tgt_default) HandleDefaultTargetOffload(); - return TargetOffloadPolicy == tgt_disabled; + if (PM->TargetOffloadPolicy == tgt_default) + HandleDefaultTargetOffload(); + return PM->TargetOffloadPolicy == tgt_disabled; } static void HandleTargetOutcome(bool success) { - switch (TargetOffloadPolicy) { - case tgt_disabled: - if (success) { - FATAL_MESSAGE0(1, "expected no offloading while offloading is disabled"); - } - break; - case tgt_default: - FATAL_MESSAGE0(1, "default offloading policy must be switched to " - "mandatory or disabled"); - break; - case tgt_mandatory: - if (!success) { - if (getInfoLevel() > 1) - for (const auto &Device : Devices) - dumpTargetPointerMappings(Device); - else - FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump host-target" - "pointer maps\n"); - - FATAL_MESSAGE0(1, "failure of target construct while offloading is mandatory"); - } - break; + switch (PM->TargetOffloadPolicy) { + case tgt_disabled: + if (success) { + FATAL_MESSAGE0(1, "expected no offloading while offloading is disabled"); + } + break; + case tgt_default: + FATAL_MESSAGE0(1, "default offloading policy must be switched to " + "mandatory or disabled"); + break; + case tgt_mandatory: + if (!success) { + if (getInfoLevel() > 1) + for (const auto &Device : PM->Devices) + dumpTargetPointerMappings(Device); + else + FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump host-target" + "pointer maps\n"); + + FATAL_MESSAGE0( + 1, "failure of target construct while offloading is mandatory"); + } + break; } } //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t flags) { - RTLs->RegisterRequires(flags); + PM->RTLs.RegisterRequires(flags); } //////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { - RTLs->RegisterLib(desc); + PM->RTLs.RegisterLib(desc); } //////////////////////////////////////////////////////////////////////////////// /// unloads a target shared library EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { - RTLs->UnregisterLib(desc); + PM->RTLs.UnregisterLib(desc); } /// creates host-to-target data mapping, stores it in the @@ -131,7 +129,7 @@ return; } - DeviceTy &Device = Devices[device_id]; + DeviceTy &Device = PM->Devices[device_id]; #ifdef OMPTARGET_DEBUG for (int i = 0; i < arg_num; ++i) { @@ -188,16 +186,16 @@ device_id = omp_get_default_device(); } - RTLsMtx->lock(); - size_t Devices_size = Devices.size(); - RTLsMtx->unlock(); - if (Devices_size <= (size_t)device_id) { + PM->RTLsMtx.lock(); + size_t DevicesSize = PM->Devices.size(); + PM->RTLsMtx.unlock(); + if (DevicesSize <= (size_t)device_id) { DP("Device ID %" PRId64 " does not have a matching RTL.\n", device_id); HandleTargetOutcome(false); return; } - DeviceTy &Device = Devices[device_id]; + DeviceTy &Device = PM->Devices[device_id]; if (!Device.IsInit) { DP("Uninit device: ignore"); HandleTargetOutcome(false); @@ -262,7 +260,7 @@ return; } - DeviceTy& Device = Devices[device_id]; + DeviceTy &Device = PM->Devices[device_id]; int rc = target_data_update(Device, arg_num, args_base, args, arg_sizes, arg_types, arg_mappers); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); @@ -439,8 +437,8 @@ DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id, loop_tripcount); - TblMapMtx->lock(); - Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), - loop_tripcount); - TblMapMtx->unlock(); + PM->TblMapMtx.lock(); + PM->Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), + loop_tripcount); + PM->TblMapMtx.unlock(); } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -59,11 +59,11 @@ int rc = OFFLOAD_SUCCESS; Device.PendingGlobalsMtx.lock(); - TrlTblMtx->lock(); - for (HostEntriesBeginToTransTableTy::iterator - ii = HostEntriesBeginToTransTable->begin(); - ii != HostEntriesBeginToTransTable->end(); ++ii) { - TranslationTable *TransTable = &ii->second; + PM->TrlTblMtx.lock(); + for (HostEntriesBeginToTransTableTy::iterator entry_it = + PM->HostEntriesBeginToTransTable.begin(); + entry_it != PM->HostEntriesBeginToTransTable.end(); ++entry_it) { + TranslationTable *TransTable = &entry_it->second; if (TransTable->HostTable.EntriesBegin == TransTable->HostTable.EntriesEnd) { // No host entry so no need to proceed @@ -141,7 +141,7 @@ } Device.DataMapMtx.unlock(); } - TrlTblMtx->unlock(); + PM->TrlTblMtx.unlock(); if (rc != OFFLOAD_SUCCESS) { Device.PendingGlobalsMtx.unlock(); @@ -188,7 +188,7 @@ } // Get device info. - DeviceTy &Device = Devices[device_id]; + DeviceTy &Device = PM->Devices[device_id]; // Check whether global data has been mapped for this device Device.PendingGlobalsMtx.lock(); @@ -368,7 +368,7 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { bool copy = false; - if (!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || HasCloseModifier) { if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { copy = true; @@ -537,7 +537,7 @@ if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) { bool Always = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; bool CopyMember = false; - if (!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || HasCloseModifier) { if ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { @@ -551,7 +551,7 @@ } if ((DelEntry || Always || CopyMember) && - !(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + !(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && TgtPtrBegin == HstPtrBegin)) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); @@ -684,7 +684,7 @@ continue; } - if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && TgtPtrBegin == HstPtrBegin) { DP("hst data:" DPxMOD " unified and shared, becomes a noop\n", DPxPTR(HstPtrBegin)); @@ -765,18 +765,19 @@ /// Find the table information in the map or look it up in the translation /// tables. TableMap *getTableMap(void *HostPtr) { - std::lock_guard TblMapLock(*TblMapMtx); - HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap->find(HostPtr); + std::lock_guard TblMapLock(PM->TblMapMtx); + HostPtrToTableMapTy::iterator TableMapIt = + PM->HostPtrToTableMap.find(HostPtr); - if (TableMapIt != HostPtrToTableMap->end()) + if (TableMapIt != PM->HostPtrToTableMap.end()) return &TableMapIt->second; // We don't have a map. So search all the registered libraries. TableMap *TM = nullptr; - std::lock_guard TrlTblLock(*TrlTblMtx); + std::lock_guard TrlTblLock(PM->TrlTblMtx); for (HostEntriesBeginToTransTableTy::iterator Itr = - HostEntriesBeginToTransTable->begin(); - Itr != HostEntriesBeginToTransTable->end(); ++Itr) { + PM->HostEntriesBeginToTransTable.begin(); + Itr != PM->HostEntriesBeginToTransTable.end(); ++Itr) { // get the translation table (which contains all the good info). TranslationTable *TransTable = &Itr->second; // iterate over all the host table entries to see if we can locate the @@ -787,7 +788,7 @@ continue; // we got a match, now fill the HostPtrToTableMap so that we // may avoid this search next time. - TM = &(*HostPtrToTableMap)[HostPtr]; + TM = &(PM->HostPtrToTableMap)[HostPtr]; TM->Table = TransTable; TM->Index = I; return TM; @@ -802,11 +803,11 @@ /// __kmpc_push_target_tripcount in one thread but doing offloading in another /// thread, which might occur when we call task yield. uint64_t getLoopTripCount(int64_t DeviceId) { - DeviceTy &Device = Devices[DeviceId]; + DeviceTy &Device = PM->Devices[DeviceId]; uint64_t LoopTripCount = 0; { - std::lock_guard TblMapLock(*TblMapMtx); + std::lock_guard TblMapLock(PM->TblMapMtx); auto I = Device.LoopTripCnt.find(__kmpc_global_thread_num(NULL)); if (I != Device.LoopTripCnt.end()) { LoopTripCount = I->second; @@ -989,7 +990,7 @@ std::vector &TgtOffsets, PrivateArgumentManagerTy &PrivateArgumentManager, __tgt_async_info *AsyncInfo) { - DeviceTy &Device = Devices[DeviceId]; + DeviceTy &Device = PM->Devices[DeviceId]; int Ret = targetDataBegin(Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgMappers, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { @@ -1028,7 +1029,7 @@ DPxPTR(HstPtrVal)); continue; } - if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && TgtPtrBegin == HstPtrBegin) { DP("Unified memory is active, no need to map lambda captured" "variable (" DPxMOD ")\n", @@ -1107,7 +1108,7 @@ int64_t *ArgTypes, void **ArgMappers, PrivateArgumentManagerTy &PrivateArgumentManager, __tgt_async_info *AsyncInfo) { - DeviceTy &Device = Devices[DeviceId]; + DeviceTy &Device = PM->Devices[DeviceId]; // Move data from device. int Ret = targetDataEnd(Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, @@ -1137,7 +1138,7 @@ int target(int64_t DeviceId, void *HostPtr, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, void **ArgMappers, int32_t TeamNum, int32_t ThreadLimit, int IsTeamConstruct) { - DeviceTy &Device = Devices[DeviceId]; + DeviceTy &Device = PM->Devices[DeviceId]; TableMap *TM = getTableMap(HostPtr); // No map for this host pointer found! @@ -1150,7 +1151,7 @@ // get target table. __tgt_target_table *TargetTable = nullptr; { - std::lock_guard TrlTblLock(*TrlTblMtx); + std::lock_guard TrlTblLock(PM->TrlTblMtx); assert(TM->Table->TargetsTable.size() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"); TargetTable = TM->Table->TargetsTable[DeviceId]; diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -40,15 +40,6 @@ extern int CheckDeviceAndCtors(int64_t device_id); -// enum for OMP_TARGET_OFFLOAD; keep in sync with kmp.h definition -enum kmp_target_offload_kind { - tgt_disabled = 0, - tgt_default = 1, - tgt_mandatory = 2 -}; -typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; -extern kmp_target_offload_kind_t TargetOffloadPolicy; - // This structure stores information of a mapped memory region. struct MapComponentInfoTy { void *Base; diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h --- a/openmp/libomptarget/src/rtl.h +++ b/openmp/libomptarget/src/rtl.h @@ -158,8 +158,6 @@ // Unregister a shared library from all RTLs. void UnregisterLib(__tgt_bin_desc *desc); }; -extern RTLsTy *RTLs; -extern std::mutex *RTLsMtx; /// Map between the host entry begin and the translation table. Each @@ -177,8 +175,6 @@ }; typedef std::map<__tgt_offload_entry *, TranslationTable> HostEntriesBeginToTransTableTy; -extern HostEntriesBeginToTransTableTy *HostEntriesBeginToTransTable; -extern std::mutex *TrlTblMtx; /// Map between the host ptr and a table index struct TableMap { @@ -189,7 +185,5 @@ : Table(table), Index(index) {} }; typedef std::map HostPtrToTableMapTy; -extern HostPtrToTableMapTy *HostPtrToTableMap; -extern std::mutex *TblMapMtx; #endif diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -31,39 +31,23 @@ /* AMDGPU target */ "libomptarget.rtl.amdgpu.so", }; -RTLsTy *RTLs; -std::mutex *RTLsMtx; - -HostEntriesBeginToTransTableTy *HostEntriesBeginToTransTable; -std::mutex *TrlTblMtx; - -HostPtrToTableMapTy *HostPtrToTableMap; -std::mutex *TblMapMtx; +PluginManager *PM; __attribute__((constructor(101))) void init() { DP("Init target library!\n"); - RTLs = new RTLsTy(); - RTLsMtx = new std::mutex(); - HostEntriesBeginToTransTable = new HostEntriesBeginToTransTableTy(); - TrlTblMtx = new std::mutex(); - HostPtrToTableMap = new HostPtrToTableMapTy(); - TblMapMtx = new std::mutex(); + PM = new PluginManager(); } __attribute__((destructor(101))) void deinit() { DP("Deinit target library!\n"); - delete RTLs; - delete RTLsMtx; - delete HostEntriesBeginToTransTable; - delete TrlTblMtx; - delete HostPtrToTableMap; - delete TblMapMtx; + delete PM; } void RTLsTy::LoadRTLs() { // Parse environment variable OMP_TARGET_OFFLOAD (if set) - TargetOffloadPolicy = (kmp_target_offload_kind_t) __kmpc_get_target_offload(); - if (TargetOffloadPolicy == tgt_disabled) { + PM->TargetOffloadPolicy = + (kmp_target_offload_kind_t)__kmpc_get_target_offload(); + if (PM->TargetOffloadPolicy == tgt_disabled) { return; } @@ -197,7 +181,7 @@ __tgt_device_image *img, RTLInfoTy *RTL) { for (int32_t i = 0; i < RTL->NumberOfDevices; ++i) { - DeviceTy &Device = Devices[RTL->Idx + i]; + DeviceTy &Device = PM->Devices[RTL->Idx + i]; Device.PendingGlobalsMtx.lock(); Device.HasPendingGlobals = true; for (__tgt_offload_entry *entry = img->EntriesBegin; @@ -266,7 +250,7 @@ // Attempt to load all plugins available in the system. std::call_once(initFlag, &RTLsTy::LoadRTLs, this); - RTLsMtx->lock(); + PM->RTLsMtx.lock(); // Register the images with the RTLs that understand them, if any. for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { // Obtain the image. @@ -290,21 +274,21 @@ if (!R.isUsed) { // Initialize the device information for the RTL we are about to use. DeviceTy device(&R); - size_t start = Devices.size(); - Devices.resize(start + R.NumberOfDevices, device); + size_t Start = PM->Devices.size(); + PM->Devices.resize(Start + R.NumberOfDevices, device); for (int32_t device_id = 0; device_id < R.NumberOfDevices; device_id++) { // global device ID - Devices[start + device_id].DeviceID = start + device_id; + PM->Devices[Start + device_id].DeviceID = Start + device_id; // RTL local device ID - Devices[start + device_id].RTLDeviceID = device_id; + PM->Devices[Start + device_id].RTLDeviceID = device_id; } // Initialize the index of this RTL and save it in the used RTLs. R.Idx = (UsedRTLs.empty()) ? 0 : UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices; - assert((size_t) R.Idx == start && + assert((size_t) R.Idx == Start && "RTL index should equal the number of devices used so far."); R.isUsed = true; UsedRTLs.push_back(&R); @@ -313,22 +297,22 @@ } // Initialize (if necessary) translation table for this library. - TrlTblMtx->lock(); - if(!HostEntriesBeginToTransTable->count(desc->HostEntriesBegin)){ - TranslationTable &tt = - (*HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; - tt.HostTable.EntriesBegin = desc->HostEntriesBegin; - tt.HostTable.EntriesEnd = desc->HostEntriesEnd; + PM->TrlTblMtx.lock(); + if (!PM->HostEntriesBeginToTransTable.count(desc->HostEntriesBegin)) { + TranslationTable &TransTable = + (PM->HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; + TransTable.HostTable.EntriesBegin = desc->HostEntriesBegin; + TransTable.HostTable.EntriesEnd = desc->HostEntriesEnd; } // Retrieve translation table for this library. TranslationTable &TransTable = - (*HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; + (PM->HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(img->ImageStart), R.RTLName.c_str()); RegisterImageIntoTranslationTable(TransTable, R, img); - TrlTblMtx->unlock(); + PM->TrlTblMtx.unlock(); FoundRTL = &R; // Load ctors/dtors for static objects @@ -342,8 +326,7 @@ DP("No RTL found for image " DPxMOD "!\n", DPxPTR(img->ImageStart)); } } - RTLsMtx->unlock(); - + PM->RTLsMtx.unlock(); DP("Done registering entries!\n"); } @@ -351,7 +334,7 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) { DP("Unloading target library!\n"); - RTLsMtx->lock(); + PM->RTLsMtx.lock(); // Find which RTL understands each image, if any. for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { // Obtain the image. @@ -379,7 +362,7 @@ // Execute dtors for static objects if the device has been used, i.e. // if its PendingCtors list has been emptied. for (int32_t i = 0; i < FoundRTL->NumberOfDevices; ++i) { - DeviceTy &Device = Devices[FoundRTL->Idx + i]; + DeviceTy &Device = PM->Devices[FoundRTL->Idx + i]; Device.PendingGlobalsMtx.lock(); if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) { for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) { @@ -407,28 +390,28 @@ DPxPTR(img->ImageStart)); } } - RTLsMtx->unlock(); + PM->RTLsMtx.unlock(); DP("Done unregistering images!\n"); - // Remove entries from HostPtrToTableMap - TblMapMtx->lock(); + // Remove entries from PM->HostPtrToTableMap + PM->TblMapMtx.lock(); for (__tgt_offload_entry *cur = desc->HostEntriesBegin; cur < desc->HostEntriesEnd; ++cur) { - HostPtrToTableMap->erase(cur->addr); + PM->HostPtrToTableMap.erase(cur->addr); } // Remove translation table for this descriptor. - auto tt = HostEntriesBeginToTransTable->find(desc->HostEntriesBegin); - if (tt != HostEntriesBeginToTransTable->end()) { + auto TransTable = PM->HostEntriesBeginToTransTable.find(desc->HostEntriesBegin); + if (TransTable != PM->HostEntriesBeginToTransTable.end()) { DP("Removing translation table for descriptor " DPxMOD "\n", DPxPTR(desc->HostEntriesBegin)); - HostEntriesBeginToTransTable->erase(tt); + PM->HostEntriesBeginToTransTable.erase(TransTable); } else { DP("Translation table for descriptor " DPxMOD " cannot be found, probably " "it has been already removed.\n", DPxPTR(desc->HostEntriesBegin)); } - TblMapMtx->unlock(); + PM->TblMapMtx.unlock(); // TODO: Remove RTL and the devices it manages if it's not used anymore? // TODO: Write some RTL->unload_image(...) function?