diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -21,9 +21,9 @@ #include EXTERN int omp_get_num_devices(void) { - RTLsMtx.lock(); + RTLsMtx->lock(); size_t Devices_size = Devices.size(); - RTLsMtx.unlock(); + RTLsMtx->unlock(); DP("Call to omp_get_num_devices returning %zd\n", Devices_size); @@ -102,9 +102,9 @@ return true; } - RTLsMtx.lock(); + RTLsMtx->lock(); size_t Devices_size = Devices.size(); - RTLsMtx.unlock(); + RTLsMtx->unlock(); if (Devices_size <= (size_t)device_num) { DP("Call to omp_target_is_present with invalid device ID, returning " "false\n"); @@ -120,7 +120,7 @@ // getTgtPtrBegin() function which means that there is no device // corresponding point for ptr. This function should return false // in that situation. - if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) + if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) rc = !IsHostPtr; DP("Call to omp_target_is_present returns %d\n", rc); return rc; diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -189,7 +189,8 @@ // maps are respected. // In addition to the mapping rules above, the close map // modifier forces the mapping of the variable to the device. - if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) { + if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + !HasCloseModifier) { DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n", DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : "")); IsHostPtr = true; @@ -235,7 +236,7 @@ (UpdateRefCount ? " updated" : ""), HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str()); rc = (void *)tp; - } else if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { + } else if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { // If the value isn't found in the mapping and unified shared memory // is on then it means we have stumbled upon a value which we need to // use directly from the host. @@ -265,7 +266,7 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete, bool HasCloseModifier) { - if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) + if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) return OFFLOAD_SUCCESS; // Check if the pointer is contained in any sub-nodes. int rc; @@ -299,7 +300,7 @@ void DeviceTy::init() { // Make call to init_requires if it exists for this plugin. if (RTL->init_requires) - RTL->init_requires(RTLs.RequiresFlags); + RTL->init_requires(RTLs->RequiresFlags); int32_t rc = RTL->init_device(RTLDeviceID); if (rc == OFFLOAD_SUCCESS) { IsInit = true; @@ -363,9 +364,9 @@ DP("Checking whether device %d is ready.\n", device_num); // Devices.size() can only change while registering a new // library, so try to acquire the lock of RTLs' mutex. - RTLsMtx.lock(); + RTLsMtx->lock(); size_t Devices_size = Devices.size(); - RTLsMtx.unlock(); + RTLsMtx->unlock(); if (Devices_size <= (size_t)device_num) { DP("Device ID %d does not have a matching RTL\n", device_num); return false; diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -71,19 +71,19 @@ //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t flags) { - RTLs.RegisterRequires(flags); + RTLs->RegisterRequires(flags); } //////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { - RTLs.RegisterLib(desc); + RTLs->RegisterLib(desc); } //////////////////////////////////////////////////////////////////////////////// /// unloads a target shared library EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { - RTLs.UnregisterLib(desc); + RTLs->UnregisterLib(desc); } /// creates host-to-target data mapping, stores it in the @@ -147,9 +147,9 @@ device_id = omp_get_default_device(); } - RTLsMtx.lock(); + RTLsMtx->lock(); size_t Devices_size = Devices.size(); - RTLsMtx.unlock(); + RTLsMtx->unlock(); if (Devices_size <= (size_t)device_id) { DP("Device ID %" PRId64 " does not have a matching RTL.\n", device_id); HandleTargetOutcome(false); @@ -343,8 +343,8 @@ DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id, loop_tripcount); - TblMapMtx.lock(); + TblMapMtx->lock(); Devices[device_id].LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), loop_tripcount); - TblMapMtx.unlock(); + TblMapMtx->unlock(); } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -67,10 +67,10 @@ int rc = OFFLOAD_SUCCESS; Device.PendingGlobalsMtx.lock(); - TrlTblMtx.lock(); + TrlTblMtx->lock(); for (HostEntriesBeginToTransTableTy::iterator - ii = HostEntriesBeginToTransTable.begin(); - ii != HostEntriesBeginToTransTable.end(); ++ii) { + ii = HostEntriesBeginToTransTable->begin(); + ii != HostEntriesBeginToTransTable->end(); ++ii) { TranslationTable *TransTable = &ii->second; if (TransTable->HostTable.EntriesBegin == TransTable->HostTable.EntriesEnd) { @@ -149,7 +149,7 @@ } Device.DataMapMtx.unlock(); } - TrlTblMtx.unlock(); + TrlTblMtx->unlock(); if (rc != OFFLOAD_SUCCESS) { Device.PendingGlobalsMtx.unlock(); @@ -299,7 +299,7 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { bool copy = false; - if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + if (!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || HasCloseModifier) { if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) { copy = true; @@ -401,7 +401,7 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS; bool CopyMember = false; - if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + if (!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || HasCloseModifier) { if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { @@ -416,7 +416,7 @@ } if ((DelEntry || Always || CopyMember) && - !(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + !(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && TgtPtrBegin == HstPtrBegin)) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); @@ -499,7 +499,7 @@ continue; } - if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && TgtPtrBegin == HstPtrBegin) { DP("hst data:" DPxMOD " unified and shared, becomes a noop\n", DPxPTR(HstPtrBegin)); @@ -590,14 +590,14 @@ // Find the table information in the map or look it up in the translation // tables. TableMap *TM = 0; - TblMapMtx.lock(); - HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap.find(host_ptr); - if (TableMapIt == HostPtrToTableMap.end()) { + TblMapMtx->lock(); + HostPtrToTableMapTy::iterator TableMapIt = HostPtrToTableMap->find(host_ptr); + if (TableMapIt == HostPtrToTableMap->end()) { // We don't have a map. So search all the registered libraries. - TrlTblMtx.lock(); + TrlTblMtx->lock(); for (HostEntriesBeginToTransTableTy::iterator - ii = HostEntriesBeginToTransTable.begin(), - ie = HostEntriesBeginToTransTable.end(); + ii = HostEntriesBeginToTransTable->begin(), + ie = HostEntriesBeginToTransTable->end(); !TM && ii != ie; ++ii) { // get the translation table (which contains all the good info). TranslationTable *TransTable = &ii->second; @@ -611,17 +611,17 @@ continue; // we got a match, now fill the HostPtrToTableMap so that we // may avoid this search next time. - TM = &HostPtrToTableMap[host_ptr]; + TM = &(*HostPtrToTableMap)[host_ptr]; TM->Table = TransTable; TM->Index = i; break; } } - TrlTblMtx.unlock(); + TrlTblMtx->unlock(); } else { TM = &TableMapIt->second; } - TblMapMtx.unlock(); + TblMapMtx->unlock(); // No map for this host pointer found! if (!TM) { @@ -631,11 +631,11 @@ } // get target table. - TrlTblMtx.lock(); + TrlTblMtx->lock(); assert(TM->Table->TargetsTable.size() > (size_t)device_id && "Not expecting a device ID outside the table's bounds!"); __tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id]; - TrlTblMtx.unlock(); + TrlTblMtx->unlock(); assert(TargetTable && "Global data has not been mapped\n"); // Move data to device. @@ -682,7 +682,7 @@ DPxPTR(HstPtrVal)); continue; } - if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && TgtPtrBegin == HstPtrBegin) { DP("Unified memory is active, no need to map lambda captured" "variable (" DPxMOD ")\n", DPxPTR(HstPtrVal)); @@ -765,14 +765,14 @@ // Pop loop trip count uint64_t ltc = 0; - TblMapMtx.lock(); + TblMapMtx->lock(); auto I = Device.LoopTripCnt.find(__kmpc_global_thread_num(NULL)); if (I != Device.LoopTripCnt.end()) { ltc = I->second; Device.LoopTripCnt.erase(I); DP("loop trip count is %lu.\n", ltc); } - TblMapMtx.unlock(); + TblMapMtx->unlock(); // Launch device execution. DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h --- a/openmp/libomptarget/src/rtl.h +++ b/openmp/libomptarget/src/rtl.h @@ -134,8 +134,8 @@ // Unregister a shared library from all RTLs. void UnregisterLib(__tgt_bin_desc *desc); }; -extern RTLsTy RTLs; -extern std::mutex RTLsMtx; +extern RTLsTy *RTLs; +extern std::mutex *RTLsMtx; /// Map between the host entry begin and the translation table. Each @@ -153,8 +153,8 @@ }; typedef std::map<__tgt_offload_entry *, TranslationTable> HostEntriesBeginToTransTableTy; -extern HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable; -extern std::mutex TrlTblMtx; +extern HostEntriesBeginToTransTableTy *HostEntriesBeginToTransTable; +extern std::mutex *TrlTblMtx; /// Map between the host ptr and a table index struct TableMap { @@ -165,7 +165,7 @@ : Table(table), Index(index) {} }; typedef std::map HostPtrToTableMapTy; -extern HostPtrToTableMapTy HostPtrToTableMap; -extern std::mutex TblMapMtx; +extern HostPtrToTableMapTy *HostPtrToTableMap; +extern std::mutex *TblMapMtx; #endif diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -28,14 +28,34 @@ /* CUDA target */ "libomptarget.rtl.cuda.so", /* AArch64 target */ "libomptarget.rtl.aarch64.so"}; -RTLsTy RTLs; -std::mutex RTLsMtx; - -HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable; -std::mutex TrlTblMtx; +RTLsTy *RTLs; +std::mutex *RTLsMtx; + +HostEntriesBeginToTransTableTy *HostEntriesBeginToTransTable; +std::mutex *TrlTblMtx; + +HostPtrToTableMapTy *HostPtrToTableMap; +std::mutex *TblMapMtx; + +__attribute__((constructor(0))) void init() { + DP("Init target library!\n"); + RTLs = new RTLsTy(); + RTLsMtx = new std::mutex(); + HostEntriesBeginToTransTable = new HostEntriesBeginToTransTableTy(); + TrlTblMtx = new std::mutex(); + HostPtrToTableMap = new HostPtrToTableMapTy(); + TblMapMtx = new std::mutex(); +} -HostPtrToTableMapTy HostPtrToTableMap; -std::mutex TblMapMtx; +__attribute__((destructor(0))) void deinit() { + DP("Deinit target library!\n"); + delete RTLs; + delete RTLsMtx; + delete HostEntriesBeginToTransTable; + delete TrlTblMtx; + delete HostPtrToTableMap; + delete TblMapMtx; +} void RTLsTy::LoadRTLs() { #ifdef OMPTARGET_DEBUG @@ -234,7 +254,7 @@ // Attempt to load all plugins available in the system. std::call_once(initFlag, &RTLsTy::LoadRTLs, this); - RTLsMtx.lock(); + RTLsMtx->lock(); // Register the images with the RTLs that understand them, if any. for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { // Obtain the image. @@ -244,7 +264,7 @@ // Scan the RTLs that have associated images until we find one that supports // the current image. - for (auto &R : RTLs.AllRTLs) { + for (auto &R : AllRTLs) { if (!R.is_valid_binary(img)) { DP("Image " DPxMOD " is NOT compatible with RTL %s!\n", DPxPTR(img->ImageStart), R.RTLName.c_str()); @@ -269,35 +289,34 @@ } // Initialize the index of this RTL and save it in the used RTLs. - R.Idx = (RTLs.UsedRTLs.empty()) + R.Idx = (UsedRTLs.empty()) ? 0 - : RTLs.UsedRTLs.back()->Idx + - RTLs.UsedRTLs.back()->NumberOfDevices; + : UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices; assert((size_t) R.Idx == start && "RTL index should equal the number of devices used so far."); R.isUsed = true; - RTLs.UsedRTLs.push_back(&R); + UsedRTLs.push_back(&R); DP("RTL " DPxMOD " has index %d!\n", DPxPTR(R.LibraryHandler), R.Idx); } // Initialize (if necessary) translation table for this library. - TrlTblMtx.lock(); - if(!HostEntriesBeginToTransTable.count(desc->HostEntriesBegin)){ + TrlTblMtx->lock(); + if(!HostEntriesBeginToTransTable->count(desc->HostEntriesBegin)){ TranslationTable &tt = - HostEntriesBeginToTransTable[desc->HostEntriesBegin]; + (*HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; tt.HostTable.EntriesBegin = desc->HostEntriesBegin; tt.HostTable.EntriesEnd = desc->HostEntriesEnd; } // Retrieve translation table for this library. TranslationTable &TransTable = - HostEntriesBeginToTransTable[desc->HostEntriesBegin]; + (*HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(img->ImageStart), R.RTLName.c_str()); RegisterImageIntoTranslationTable(TransTable, R, img); - TrlTblMtx.unlock(); + TrlTblMtx->unlock(); FoundRTL = &R; // Load ctors/dtors for static objects @@ -311,7 +330,7 @@ DP("No RTL found for image " DPxMOD "!\n", DPxPTR(img->ImageStart)); } } - RTLsMtx.unlock(); + RTLsMtx->unlock(); DP("Done registering entries!\n"); @@ -320,7 +339,7 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) { DP("Unloading target library!\n"); - RTLsMtx.lock(); + RTLsMtx->lock(); // Find which RTL understands each image, if any. for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { // Obtain the image. @@ -330,7 +349,7 @@ // Scan the RTLs that have associated images until we find one that supports // the current image. We only need to scan RTLs that are already being used. - for (auto *R : RTLs.UsedRTLs) { + for (auto *R : UsedRTLs) { assert(R->isUsed && "Expecting used RTLs."); @@ -376,28 +395,28 @@ DPxPTR(img->ImageStart)); } } - RTLsMtx.unlock(); + RTLsMtx->unlock(); DP("Done unregistering images!\n"); // Remove entries from HostPtrToTableMap - TblMapMtx.lock(); + TblMapMtx->lock(); for (__tgt_offload_entry *cur = desc->HostEntriesBegin; cur < desc->HostEntriesEnd; ++cur) { - HostPtrToTableMap.erase(cur->addr); + HostPtrToTableMap->erase(cur->addr); } // Remove translation table for this descriptor. - auto tt = HostEntriesBeginToTransTable.find(desc->HostEntriesBegin); - if (tt != HostEntriesBeginToTransTable.end()) { + auto tt = HostEntriesBeginToTransTable->find(desc->HostEntriesBegin); + if (tt != HostEntriesBeginToTransTable->end()) { DP("Removing translation table for descriptor " DPxMOD "\n", DPxPTR(desc->HostEntriesBegin)); - HostEntriesBeginToTransTable.erase(tt); + HostEntriesBeginToTransTable->erase(tt); } else { DP("Translation table for descriptor " DPxMOD " cannot be found, probably " "it has been already removed.\n", DPxPTR(desc->HostEntriesBegin)); } - TblMapMtx.unlock(); + TblMapMtx->unlock(); // TODO: Remove RTL and the devices it manages if it's not used anymore? // TODO: Write some RTL->unload_image(...) function? diff --git a/openmp/libomptarget/test/offloading/dynamic_module_load.c b/openmp/libomptarget/test/offloading/dynamic_module_load.c new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/offloading/dynamic_module_load.c @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu -DSHARED -shared -o %t.so && %clang %flags %s -o %t-aarch64-unknown-linux-gnu -ldl && %libomptarget-run-aarch64-unknown-linux-gnu %t.so 2>&1 | %fcheck-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu -DSHARED -shared -o %t.so && %clang %flags %s -o %t-powerpc64-ibm-linux-gnu -ldl && %libomptarget-run-powerpc64-ibm-linux-gnu %t.so 2>&1 | %fcheck-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu -DSHARED -shared -o %t.so && %clang %flags %s -o %t-powerpc64le-ibm-linux-gnu -ldl && %libomptarget-run-powerpc64le-ibm-linux-gnu %t.so 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-x86_64-pc-linux-gnu -DSHARED -shared -o %t.so && %clang %flags %s -o %t-x86_64-pc-linux-gnu -ldl && %libomptarget-run-x86_64-pc-linux-gnu %t.so 2>&1 | %fcheck-x86_64-pc-linux-gnu + +#ifdef SHARED +#include +int foo() { +#pragma omp target + ; + printf("%s\n", "DONE."); + return 0; +} +#else +#include +#include +int main(int argc, char **argv) { + void *Handle = dlopen(argv[1], RTLD_NOW); + int (*Foo)(void); + + if (Handle == NULL) { + printf("dlopen() failed: %s\n", dlerror()); + return 1; + } + Foo = (int (*)(void)) dlsym(Handle, "foo"); + if (Handle == NULL) { + printf("dlsym() failed: %s\n", dlerror()); + return 1; + } + // CHECK: DONE. + // CHECK-NOT: {{abort|fault}} + return Foo(); +} +#endif